]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/commitdiff
cmd/xfs/bmap/Makefile 1.8 Renamed to cmd/xfsprogs/bmap/Makefile
authorNathan Scott <nathans@sgi.com>
Mon, 15 Jan 2001 05:36:03 +0000 (05:36 +0000)
committerNathan Scott <nathans@sgi.com>
Mon, 15 Jan 2001 05:36:03 +0000 (05:36 +0000)
278 files changed:
bmap/Makefile [new file with mode: 0644]
bmap/xfs_bmap.c [new file with mode: 0644]
build/Makefile [new file with mode: 0644]
build/rpm/Makefile [new file with mode: 0644]
build/rpm/macros.template [new file with mode: 0644]
build/rpm/rpm-2.rc.template [new file with mode: 0644]
build/tar/Makefile [new file with mode: 0644]
db/Makefile [new file with mode: 0644]
db/addr.c [new file with mode: 0644]
db/addr.h [new file with mode: 0644]
db/agf.c [new file with mode: 0644]
db/agf.h [new file with mode: 0644]
db/agfl.c [new file with mode: 0644]
db/agfl.h [new file with mode: 0644]
db/agi.c [new file with mode: 0644]
db/agi.h [new file with mode: 0644]
db/attr.c [new file with mode: 0644]
db/attr.h [new file with mode: 0644]
db/attrshort.c [new file with mode: 0644]
db/attrshort.h [new file with mode: 0644]
db/bit.c [new file with mode: 0644]
db/bit.h [new file with mode: 0644]
db/block.c [new file with mode: 0644]
db/block.h [new file with mode: 0644]
db/bmap.c [new file with mode: 0644]
db/bmap.h [new file with mode: 0644]
db/bmapbt.c [new file with mode: 0644]
db/bmapbt.h [new file with mode: 0644]
db/bmroot.c [new file with mode: 0644]
db/bmroot.h [new file with mode: 0644]
db/bnobt.c [new file with mode: 0644]
db/bnobt.h [new file with mode: 0644]
db/check.c [new file with mode: 0644]
db/check.h [new file with mode: 0644]
db/cntbt.c [new file with mode: 0644]
db/cntbt.h [new file with mode: 0644]
db/command.c [new file with mode: 0644]
db/command.h [new file with mode: 0644]
db/convert.c [new file with mode: 0644]
db/convert.h [new file with mode: 0644]
db/data.c [new file with mode: 0644]
db/data.h [new file with mode: 0644]
db/dbread.c [new file with mode: 0644]
db/dbread.h [new file with mode: 0644]
db/debug.c [new file with mode: 0644]
db/debug.h [new file with mode: 0644]
db/dir.c [new file with mode: 0644]
db/dir.h [new file with mode: 0644]
db/dir2.c [new file with mode: 0644]
db/dir2.h [new file with mode: 0644]
db/dir2sf.c [new file with mode: 0644]
db/dir2sf.h [new file with mode: 0644]
db/dirshort.c [new file with mode: 0644]
db/dirshort.h [new file with mode: 0644]
db/dquot.c [new file with mode: 0644]
db/dquot.h [new file with mode: 0644]
db/echo.c [new file with mode: 0644]
db/echo.h [new file with mode: 0644]
db/faddr.c [new file with mode: 0644]
db/faddr.h [new file with mode: 0644]
db/field.c [new file with mode: 0644]
db/field.h [new file with mode: 0644]
db/flist.c [new file with mode: 0644]
db/flist.h [new file with mode: 0644]
db/fprint.c [new file with mode: 0644]
db/fprint.h [new file with mode: 0644]
db/frag.c [new file with mode: 0644]
db/frag.h [new file with mode: 0644]
db/freesp.c [new file with mode: 0644]
db/freesp.h [new file with mode: 0644]
db/hash.c [new file with mode: 0644]
db/hash.h [new file with mode: 0644]
db/help.c [new file with mode: 0644]
db/help.h [new file with mode: 0644]
db/init.c [new file with mode: 0644]
db/init.h [new file with mode: 0644]
db/inobt.c [new file with mode: 0644]
db/inobt.h [new file with mode: 0644]
db/inode.c [new file with mode: 0644]
db/inode.h [new file with mode: 0644]
db/input.c [new file with mode: 0644]
db/input.h [new file with mode: 0644]
db/io.c [new file with mode: 0644]
db/io.h [new file with mode: 0644]
db/main.c [new file with mode: 0644]
db/malloc.c [new file with mode: 0644]
db/malloc.h [new file with mode: 0644]
db/mount.c [new file with mode: 0644]
db/mount.h [new file with mode: 0644]
db/output.c [new file with mode: 0644]
db/output.h [new file with mode: 0644]
db/print.c [new file with mode: 0644]
db/print.h [new file with mode: 0644]
db/quit.c [new file with mode: 0644]
db/quit.h [new file with mode: 0644]
db/sb.c [new file with mode: 0644]
db/sb.h [new file with mode: 0644]
db/sig.c [new file with mode: 0644]
db/sig.h [new file with mode: 0644]
db/strvec.c [new file with mode: 0644]
db/strvec.h [new file with mode: 0644]
db/type.c [new file with mode: 0644]
db/type.h [new file with mode: 0644]
db/uuid.c [new file with mode: 0644]
db/uuid.h [new file with mode: 0644]
db/write.c [new file with mode: 0644]
db/write.h [new file with mode: 0644]
db/xfs_admin.sh [new file with mode: 0755]
db/xfs_check.sh [new file with mode: 0755]
db/xfs_check64.sh [new file with mode: 0755]
db/xfs_ncheck.sh [new file with mode: 0755]
db/xfs_ncheck64.sh [new file with mode: 0755]
doc/Makefile [new file with mode: 0644]
doc/README.LVM [new file with mode: 0644]
fsck/Makefile [new file with mode: 0644]
fsck/xfs_fsck.c [new file with mode: 0644]
growfs/Makefile [new file with mode: 0644]
growfs/xfs_growfs.c [new file with mode: 0644]
growfs/xfs_info.sh [new file with mode: 0755]
include/Makefile [new file with mode: 0644]
include/arch.h [new file with mode: 0644]
include/builddefs.in [new file with mode: 0644]
include/buildrules [new file with mode: 0644]
include/handle.h [new file with mode: 0644]
include/jdm.h [new file with mode: 0644]
include/libxfs.h [new file with mode: 0644]
include/platform_defs.h.in [new file with mode: 0644]
include/xfs_ag.h [new file with mode: 0644]
include/xfs_alloc.h [new file with mode: 0644]
include/xfs_alloc_btree.h [new file with mode: 0644]
include/xfs_arch.h [new file with mode: 0644]
include/xfs_attr_leaf.h [new file with mode: 0644]
include/xfs_attr_sf.h [new file with mode: 0644]
include/xfs_bit.h [new file with mode: 0644]
include/xfs_bmap.h [new file with mode: 0644]
include/xfs_bmap_btree.h [new file with mode: 0644]
include/xfs_btree.h [new file with mode: 0644]
include/xfs_buf_item.h [new file with mode: 0644]
include/xfs_cred.h [new file with mode: 0644]
include/xfs_da_btree.h [new file with mode: 0644]
include/xfs_dfrag.h [new file with mode: 0644]
include/xfs_dinode.h [new file with mode: 0644]
include/xfs_dir.h [new file with mode: 0644]
include/xfs_dir2.h [new file with mode: 0644]
include/xfs_dir2_block.h [new file with mode: 0644]
include/xfs_dir2_data.h [new file with mode: 0644]
include/xfs_dir2_leaf.h [new file with mode: 0644]
include/xfs_dir2_node.h [new file with mode: 0644]
include/xfs_dir2_sf.h [new file with mode: 0644]
include/xfs_dir_leaf.h [new file with mode: 0644]
include/xfs_dir_sf.h [new file with mode: 0644]
include/xfs_dqblk.h [new file with mode: 0644]
include/xfs_dquot_item.h [new file with mode: 0644]
include/xfs_extfree_item.h [new file with mode: 0644]
include/xfs_fs.h [new file with mode: 0644]
include/xfs_ialloc.h [new file with mode: 0644]
include/xfs_ialloc_btree.h [new file with mode: 0644]
include/xfs_imap.h [new file with mode: 0644]
include/xfs_inode.h [new file with mode: 0644]
include/xfs_inode_item.h [new file with mode: 0644]
include/xfs_inum.h [new file with mode: 0644]
include/xfs_log.h [new file with mode: 0644]
include/xfs_log_priv.h [new file with mode: 0644]
include/xfs_log_recover.h [new file with mode: 0644]
include/xfs_mount.h [new file with mode: 0644]
include/xfs_quota.h [new file with mode: 0644]
include/xfs_rtalloc.h [new file with mode: 0644]
include/xfs_sb.h [new file with mode: 0644]
include/xfs_trans.h [new file with mode: 0644]
include/xfs_trans_space.h [new file with mode: 0644]
include/xfs_types.h [new file with mode: 0644]
include/xqm.h [new file with mode: 0644]
libxfs/Makefile [new file with mode: 0644]
libxfs/init.c [new file with mode: 0644]
libxfs/logitem.c [new file with mode: 0644]
libxfs/rdwr.c [new file with mode: 0644]
libxfs/trans.c [new file with mode: 0644]
libxfs/util.c [new file with mode: 0644]
libxfs/xfs.h [new file with mode: 0644]
libxfs/xfs_alloc.c [new file with mode: 0644]
libxfs/xfs_alloc_btree.c [new file with mode: 0644]
libxfs/xfs_attr_leaf.c [new file with mode: 0644]
libxfs/xfs_bit.c [new file with mode: 0644]
libxfs/xfs_bmap.c [new file with mode: 0644]
libxfs/xfs_bmap_btree.c [new file with mode: 0644]
libxfs/xfs_btree.c [new file with mode: 0644]
libxfs/xfs_da_btree.c [new file with mode: 0644]
libxfs/xfs_dir.c [new file with mode: 0644]
libxfs/xfs_dir2.c [new file with mode: 0644]
libxfs/xfs_dir2_block.c [new file with mode: 0644]
libxfs/xfs_dir2_data.c [new file with mode: 0644]
libxfs/xfs_dir2_leaf.c [new file with mode: 0644]
libxfs/xfs_dir2_node.c [new file with mode: 0644]
libxfs/xfs_dir2_sf.c [new file with mode: 0644]
libxfs/xfs_dir_leaf.c [new file with mode: 0644]
libxfs/xfs_ialloc.c [new file with mode: 0644]
libxfs/xfs_ialloc_btree.c [new file with mode: 0644]
libxfs/xfs_inode.c [new file with mode: 0644]
libxfs/xfs_mount.c [new file with mode: 0644]
libxfs/xfs_rtalloc.c [new file with mode: 0644]
libxfs/xfs_rtbit.c [new file with mode: 0644]
libxfs/xfs_trans.c [new file with mode: 0644]
logprint/Makefile [new file with mode: 0644]
logprint/log_misc.c [new file with mode: 0644]
logprint/log_print_all.c [new file with mode: 0644]
logprint/log_print_trans.c [new file with mode: 0644]
logprint/logprint.c [new file with mode: 0644]
logprint/logprint.h [new file with mode: 0644]
man/Makefile [new file with mode: 0644]
man/man5/Makefile [new file with mode: 0644]
man/man5/xfs.5 [new file with mode: 0644]
man/man8/Makefile [new file with mode: 0644]
man/man8/fsck.xfs.8 [new file with mode: 0644]
man/man8/mkfs.xfs.8 [new file with mode: 0644]
man/man8/xfs_admin.8 [new file with mode: 0644]
man/man8/xfs_bmap.8 [new file with mode: 0644]
man/man8/xfs_check.8 [new file with mode: 0644]
man/man8/xfs_db.8 [new file with mode: 0644]
man/man8/xfs_growfs.8 [new file with mode: 0644]
man/man8/xfs_logprint.8 [new file with mode: 0644]
man/man8/xfs_mkfile.8 [new file with mode: 0644]
man/man8/xfs_ncheck.8 [new file with mode: 0644]
man/man8/xfs_repair.8 [new file with mode: 0644]
mkfile/Makefile [new file with mode: 0644]
mkfile/xfs_mkfile.c [new file with mode: 0644]
mkfs/Makefile [new file with mode: 0644]
mkfs/maxtrres.c [new file with mode: 0644]
mkfs/proto.c [new file with mode: 0644]
mkfs/proto.h [new file with mode: 0644]
mkfs/xfs_mkfs.c [new file with mode: 0644]
mkfs/xfs_mkfs.h [new file with mode: 0644]
repair/Makefile [new file with mode: 0644]
repair/README [new file with mode: 0644]
repair/agheader.c [new file with mode: 0644]
repair/agheader.h [new file with mode: 0644]
repair/attr_repair.c [new file with mode: 0644]
repair/attr_repair.h [new file with mode: 0644]
repair/avl.c [new file with mode: 0644]
repair/avl.h [new file with mode: 0644]
repair/avl64.c [new file with mode: 0644]
repair/avl64.h [new file with mode: 0644]
repair/bmap.c [new file with mode: 0644]
repair/bmap.h [new file with mode: 0644]
repair/dino_chunks.c [new file with mode: 0644]
repair/dinode.c [new file with mode: 0644]
repair/dinode.h [new file with mode: 0644]
repair/dir.c [new file with mode: 0644]
repair/dir.h [new file with mode: 0644]
repair/dir2.c [new file with mode: 0644]
repair/dir2.h [new file with mode: 0644]
repair/dir_stack.c [new file with mode: 0644]
repair/dir_stack.h [new file with mode: 0644]
repair/err_protos.h [new file with mode: 0644]
repair/globals.c [new file with mode: 0644]
repair/globals.h [new file with mode: 0644]
repair/incore.c [new file with mode: 0644]
repair/incore.h [new file with mode: 0644]
repair/incore_bmc.c [new file with mode: 0644]
repair/incore_ext.c [new file with mode: 0644]
repair/incore_ino.c [new file with mode: 0644]
repair/init.c [new file with mode: 0644]
repair/io.c [new file with mode: 0644]
repair/phase1.c [new file with mode: 0644]
repair/phase2.c [new file with mode: 0644]
repair/phase3.c [new file with mode: 0644]
repair/phase4.c [new file with mode: 0644]
repair/phase5.c [new file with mode: 0644]
repair/phase6.c [new file with mode: 0644]
repair/phase7.c [new file with mode: 0644]
repair/protos.h [new file with mode: 0644]
repair/rt.c [new file with mode: 0644]
repair/rt.h [new file with mode: 0644]
repair/sb.c [new file with mode: 0644]
repair/scan.c [new file with mode: 0644]
repair/scan.h [new file with mode: 0644]
repair/versions.c [new file with mode: 0644]
repair/versions.h [new file with mode: 0644]
repair/xfs_repair.c [new file with mode: 0644]

diff --git a/bmap/Makefile b/bmap/Makefile
new file mode 100644 (file)
index 0000000..741ae56
--- /dev/null
@@ -0,0 +1,45 @@
+#
+# Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+# 
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# 
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like.  Any license provided herein, whether implied or
+# otherwise, applies only to this software file.  Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write the Free Software Foundation, Inc., 59
+# Temple Place - Suite 330, Boston MA 02111-1307, USA.
+# 
+# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+# Mountain View, CA  94043, or:
+# 
+# http://www.sgi.com 
+# 
+# For further information regarding this notice, see: 
+# 
+# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+#
+
+TOPDIR = ..
+include $(TOPDIR)/include/builddefs
+
+CMDTARGET = xfs_bmap
+CFILES = xfs_bmap.c
+
+default: $(CMDTARGET)
+
+include $(BUILDRULES)
+
+install: default
+       $(INSTALL) -m 755 -d $(XFS_CMDS_BIN_DIR)
+       $(INSTALL) -m 755 $(CMDTARGET) $(XFS_CMDS_BIN_DIR)
diff --git a/bmap/xfs_bmap.c b/bmap/xfs_bmap.c
new file mode 100644 (file)
index 0000000..bd594a7
--- /dev/null
@@ -0,0 +1,405 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+/* 
+ * Bmap display utility for xfs.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <libxfs.h>
+
+int aflag = 0; /* Attribute fork. */
+int lflag = 0; /* list number of blocks with each extent */
+int nflag = 0; /* number of extents specified */
+int vflag = 0; /* Verbose output */
+int bmv_iflags = 0;    /* Input flags for XFS_IOC_GETBMAPX */
+
+int dofile(char *);
+__off64_t file_size(int fd, char * fname);
+int numlen(__off64_t);
+
+int
+main(int argc, char **argv)
+{
+       char    *fname;
+       int     i = 0;
+       int     option;
+
+       fname = basename(argv[0]);
+       while ((option = getopt(argc, argv, "adln:pvV")) != EOF) {
+               switch (option) {
+               case 'a':
+                       bmv_iflags |= BMV_IF_ATTRFORK;
+                       aflag = 1;
+                       break;
+               case 'l':
+                       lflag = 1;
+                       break;
+               case 'n':
+                       nflag = atoi(optarg);
+                       break;
+               case 'd':
+               /* do not recall possibly offline DMAPI files */
+                       bmv_iflags |= BMV_IF_NO_DMAPI_READ;
+                       break;
+               case 'p':
+               /* report unwritten preallocated blocks */
+                       bmv_iflags |= BMV_IF_PREALLOC;
+                       break;
+               case 'v':
+                       vflag++;
+                       break;
+               case 'V':
+                       printf("%s version %s\n", fname, VERSION);
+                       break;
+               default:
+                       fprintf(stderr, "Usage: %s [-adlpV] [-n nx] file...\n",
+                                       fname);
+                       exit(1);
+               }
+       }
+       if (aflag) 
+               bmv_iflags &=  ~(BMV_IF_PREALLOC|BMV_IF_NO_DMAPI_READ);
+       while (optind < argc) {
+               fname = argv[optind];
+               i += dofile(fname);
+               optind++;
+       }
+       return(i ? 1 : 0);
+}
+
+__off64_t
+file_size(int  fd, char *fname)
+{
+       struct  stat64  st;
+       int             i;
+       int             errno_save;
+
+       errno_save = errno;     /* in case fstat64 fails */
+       i = fstat64(fd, &st);
+       if (i < 0) {
+               fprintf(stderr,"fstat64 failed for %s", fname);
+               perror("fstat64");
+               errno = errno_save;
+               return -1;
+       }
+       return st.st_size;
+}
+       
+
+int
+dofile(char *fname)
+{
+       int             fd;
+       struct fsxattr  fsx;
+       int             i;
+       struct getbmapx *map;
+       char            mbuf[1024];
+       int             map_size;
+       int             loop = 0;
+       xfs_fsop_geom_t fsgeo;
+
+       fd = open(fname, O_RDONLY);
+       if (fd < 0) {
+               sprintf(mbuf, "open %s", fname);
+               perror(mbuf);
+               return 1;
+       }
+
+       if (vflag) {
+               if (ioctl(fd, XFS_IOC_FSGEOMETRY, &fsgeo) < 0) {
+                       sprintf(mbuf, "Can't get XFS geom, %s", fname);
+                       perror(mbuf);
+                       close(fd);
+                       return 1;
+               }
+               
+               if (vflag > 1)
+                       printf(
+       "xfs_bmap: fsgeo.agblocks=%u, fsgeo.blocksize=%u, fsgeo.agcount=%u\n",
+                                       fsgeo.agblocks, fsgeo.blocksize,
+                                       fsgeo.agcount);
+
+               if ((ioctl(fd, XFS_IOC_FSGETXATTR, &fsx)) < 0) {
+                       sprintf(mbuf, "Can't read attrs %s", fname);
+                       perror(mbuf);
+                       close(fd);
+                       return 1;
+               }
+
+               if (vflag > 1)
+                       printf(
+    "xfs_bmap: fsx.dsx_xflags=%u, fsx.fsx_extsize=%u, fsx.fsx_nextents=%u\n",
+                                       fsx.fsx_xflags, fsx.fsx_extsize,
+                                       fsx.fsx_nextents);
+
+               if (fsx.fsx_xflags == XFS_XFLAG_REALTIME) {
+                       /* 
+                        * ag info not applicable to rt, continue
+                        * without ag output.
+                        */
+                       vflag = 0;  
+               }
+       }
+
+       map_size = nflag ? nflag+1 : 32;        /* initial guess - 256 for checkin KCM */
+       map = malloc(map_size*sizeof(*map));
+       if (map == NULL) {
+               fprintf(stderr, "malloc of %d bytes failed.\n",
+                                                       map_size*sizeof(*map));
+               close(fd);
+               return 1;
+       }
+               
+
+/*     Try the ioctl(XFS_IOC_GETBMAPX) for the number of extents specified by
+ *     nflag, or the initial guess number of extents (256).
+ *
+ *     If there are more extents than we guessed, use ioctl 
+ *     (XFS_IOC_FSGETXATTR[A]) to get the extent count, realloc some more 
+ *     space based on this count, and try again.
+ *
+ *     If the initial FGETBMAPX attempt returns EINVAL, this may mean
+ *     that we tried the FGETBMAPX on a zero length file.  If we get
+ *     EINVAL, check the length with fstat() and return "no extents"
+ *     if the length == 0.
+ *
+ *     Why not do the ioctl(XFS_IOC_FSGETXATTR[A]) first?  Two reasons:
+ *     (1)     The extent count may be wrong for a file with delayed
+ *             allocation blocks.  The XFS_IOC_GETBMAPX forces the real
+ *             allocation and fixes up the extent count.
+ *     (2)     For XFS_IOC_GETBMAP[X] on a DMAPI file that has been moved 
+ *             offline by a DMAPI application (e.g., DMF) the 
+ *             XFS_IOC_FSGETXATTR only reflects the extents actually online.
+ *             Doing XFS_IOC_GETBMAPX call first forces that data blocks online
+ *             and then everything proceeds normally (see PV #545725).
+ *             
+ *             If you don't want this behavior on a DMAPI offline file,
+ *             try the "-d" option which sets the BMV_IF_NO_DMAPI_READ
+ *             iflag for XFS_IOC_GETBMAPX.
+ */
+
+       do {    /* loop a miximum of two times */
+
+               bzero(map, sizeof(*map));       /* zero header */
+
+               map->bmv_length = -1;
+               map->bmv_count = map_size;
+               map->bmv_iflags = bmv_iflags;
+
+               i = ioctl(fd, XFS_IOC_GETBMAPX, map);
+
+               if (vflag > 1)
+                       printf(
+               "xfs_bmap: i=%d map.bmv_offset=%lld, map.bmv_block=%lld, "
+               "map.bmv_length=%lld, map.bmv_count=%d, map.bmv_entries=%d\n",
+                                       i, map->bmv_offset, map->bmv_block,
+                                       map->bmv_length, map->bmv_count,
+                                       map->bmv_entries);
+               if (i < 0) {
+                       if (   errno == EINVAL
+                           && !aflag && file_size(fd, fname) == 0) {
+                               break;
+                       } else  {
+                               sprintf(mbuf, "ioctl(XFS_IOC_GETBMAPX (iflags 0x%x) %s",
+                                                       map->bmv_iflags, fname);
+                               perror(mbuf);
+                               close(fd);
+                               free(map);
+                               return 1;
+                       }
+               }
+               if (nflag)
+                       break;
+               if (map->bmv_entries < map->bmv_count-1)
+                       break;
+               /* Get number of extents from ioctl XFS_IOC_FSGETXATTR[A]
+                * syscall.
+                */
+               i = ioctl(fd, aflag ? XFS_IOC_FSGETXATTRA : XFS_IOC_FSGETXATTR, &fsx);
+               if (i < 0) {
+                       sprintf(mbuf, "ioctl(XFS_IOC_FSGETXATTR%s) %s",
+                               aflag ? "A" : "", fname);
+                       perror(mbuf);
+                       close(fd);
+                       free(map);
+                       return 1;
+               }
+               if (fsx.fsx_nextents >= map_size-1) {
+                       map_size = 2*(fsx.fsx_nextents+1);
+                       map = realloc(map, map_size*sizeof(*map));
+                       if (map == NULL) {
+                               fprintf(stderr,"cannot realloc %d bytes.\n",
+                                               map_size*sizeof(*map));
+                               close(fd);
+                               return 1;
+                       }
+               }
+       } while (++loop < 2);
+       if (!nflag) {
+               if (map->bmv_entries <= 0) {
+                       printf("%s: no extents\n", fname);
+                       close(fd);
+                       free(map);
+                       return 0;
+               }
+       }
+       close(fd);
+       printf("%s:\n", fname);
+       if (!vflag) {
+               for (i = 0; i < map->bmv_entries; i++) {
+                       printf("\t%d: [%lld..%lld]: ", i,
+                               map[i + 1].bmv_offset,
+                               map[i + 1].bmv_offset + 
+                               map[i + 1].bmv_length - 1LL);
+                       if (map[i + 1].bmv_block == -1)
+                               printf("hole");
+                       else {
+                               printf("%lld..%lld", map[i + 1].bmv_block,
+                                       map[i + 1].bmv_block +
+                                               map[i + 1].bmv_length - 1LL);
+
+                       }
+                       if (lflag)
+                               printf(" %lld blocks\n", map[i+1].bmv_length);
+                       else
+                               printf("\n");
+               }
+       } else {
+               /*
+                * Verbose mode displays: 
+                *   extent: [startoffset..endoffset]: startblock..endblock \
+                *      ag# (agoffset..agendoffset) totalbbs
+                */
+#define MINRANGE_WIDTH 16
+#define MINAG_WIDTH    2
+#define MINTOT_WIDTH   5
+#define        max(a,b)        (a > b ? a : b)
+               int       agno;
+               __off64_t agoff, bbperag;
+               int       foff_w, boff_w, aoff_w, tot_w, agno_w;
+               char      rbuf[32], bbuf[32], abuf[32];
+
+               foff_w = boff_w = aoff_w = MINRANGE_WIDTH;
+               tot_w = MINTOT_WIDTH;
+               bbperag = (__off64_t)fsgeo.agblocks * 
+                         (__off64_t)fsgeo.blocksize / BBSIZE;
+
+               /* 
+                * Go through the extents and figure out the width
+                * needed for all columns.
+                */
+               for (i = 0; i < map->bmv_entries; i++) {
+                       sprintf(rbuf, "[%lld..%lld]:", 
+                               map[i + 1].bmv_offset,
+                               map[i + 1].bmv_offset +
+                               map[i + 1].bmv_length - 1LL);
+                       if (map[i + 1].bmv_block == -1) {
+                               foff_w = max(foff_w, strlen(rbuf)); 
+                               tot_w = max(tot_w, 
+                                       numlen(map[i+1].bmv_length));
+                       } else {
+                               sprintf(bbuf, "%lld..%lld", 
+                                       map[i + 1].bmv_block,
+                                       map[i + 1].bmv_block +
+                                               map[i + 1].bmv_length - 1LL);
+                               agno = map[i + 1].bmv_block / bbperag;
+                               agoff = map[i + 1].bmv_block - (agno * bbperag);
+                               sprintf(abuf, "(%lld..%lld)", 
+                                       agoff, 
+                                       (agoff + map[i + 1].bmv_length - 1LL));
+                               foff_w = max(foff_w, strlen(rbuf)); 
+                               boff_w = max(boff_w, strlen(bbuf)); 
+                               aoff_w = max(aoff_w, strlen(abuf)); 
+                               tot_w = max(tot_w, 
+                                       numlen(map[i+1].bmv_length));
+                       }
+               }
+               agno_w = max(MINAG_WIDTH, numlen(fsgeo.agcount));
+               printf("%4s: %-*s %-*s %*s %-*s %*s\n", 
+                       "EXT", 
+                       foff_w, "FILE-OFFSET", 
+                       boff_w, "BLOCK-RANGE", 
+                       agno_w, "AG", 
+                       aoff_w, "AG-OFFSET", 
+                       tot_w, "TOTAL");
+               for (i = 0; i < map->bmv_entries; i++) {
+                       sprintf(rbuf, "[%lld..%lld]:", 
+                               map[i + 1].bmv_offset,
+                               map[i + 1].bmv_offset +
+                               map[i + 1].bmv_length - 1LL);
+                       if (map[i + 1].bmv_block == -1) {
+                               printf("%4d: %-*s %-*s %*s %-*s %*lld\n", 
+                                       i, 
+                                       foff_w, rbuf, 
+                                       boff_w, "hole", 
+                                       agno_w, "",
+                                       aoff_w, "", 
+                                       tot_w, map[i+1].bmv_length);
+                       } else {
+                               sprintf(bbuf, "%lld..%lld", 
+                                       map[i + 1].bmv_block,
+                                       map[i + 1].bmv_block +
+                                               map[i + 1].bmv_length - 1LL);
+                               agno = map[i + 1].bmv_block / bbperag;
+                               agoff = map[i + 1].bmv_block - (agno * bbperag);
+                               sprintf(abuf, "(%lld..%lld)", 
+                                       agoff, 
+                                       (agoff + map[i + 1].bmv_length - 1LL));
+                               printf("%4d: %-*s %-*s %*d %-*s %*lld\n", 
+                                       i, 
+                                       foff_w, rbuf, 
+                                       boff_w, bbuf, 
+                                       agno_w, agno, 
+                                       aoff_w, abuf, 
+                                       tot_w, map[i+1].bmv_length);
+                       }
+               }
+       }
+       free(map);
+       return 0;
+}
+
+int
+numlen( __off64_t val)
+{
+       __off64_t tmp;
+       int len;
+
+       for (len=0, tmp=val; tmp > 0; tmp=tmp/10) len++;
+       return(len == 0 ? 1 : len);
+}
diff --git a/build/Makefile b/build/Makefile
new file mode 100644 (file)
index 0000000..7e336de
--- /dev/null
@@ -0,0 +1,78 @@
+#
+# Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+# 
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# 
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like.  Any license provided herein, whether implied or
+# otherwise, applies only to this software file.  Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write the Free Software Foundation, Inc., 59
+# Temple Place - Suite 330, Boston MA 02111-1307, USA.
+# 
+# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+# Mountain View, CA  94043, or:
+# 
+# http://www.sgi.com 
+# 
+# For further information regarding this notice, see: 
+# 
+# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+#
+
+TOPDIR = ..
+include $(TOPDIR)/include/builddefs
+
+MANIFEST=src-manifest
+SRCTAR=$(PACKAGE_NAME)-$(PACKAGE_VERSION).src.tar.gz
+SRCTAR2=$(PACKAGE_NAME)_$(PACKAGE_VERSION).orig.tar.gz
+
+LDIRT = $(MANIFEST) $(SRCTAR) $(SRCTAR2) bin-manifest $(TOPDIR)/$(PACKAGE_NAME)-$(PACKAGE_VERSION)
+
+# for clean and clobber
+SUBDIRS = tar rpm deb
+
+# nothing to build here (it's all packaging)
+default install :
+
+include $(BUILDRULES)
+
+# Symlink in the TOPDIR is used to pack files relative to
+# product-version directory.
+$(MANIFEST) : $(_FORCE)
+       @if [ ! -L $(TOPDIR)/$(PACKAGE_NAME)-$(PACKAGE_VERSION) ] ; then \
+           $(LN_S) . $(TOPDIR)/$(PACKAGE_NAME)-$(PACKAGE_VERSION) ; \
+       fi
+       @CDIR=`pwd`; cd $(TOPDIR); \
+       $(MAKE) --no-print-directory source | \
+           sed -e 's/^\./$(PACKAGE_NAME)-$(PACKAGE_VERSION)/' > $$CDIR/$@ ;\
+       if [ $$?  -ne 0 ] ; then  \
+           exit 1; \
+       else \
+           unset TAPE; \
+           $(TAR) -T $$CDIR/$@ -cf - | $(ZIP) --best > $$CDIR/$(SRCTAR); \
+           $(LN_S) $$CDIR/$(SRCTAR) $$CDIR/$(SRCTAR2); \
+       fi
+
+dist : default $(MANIFEST)
+       @DIST_MANIFEST=`pwd`/bin-manifest; DIST_ROOT=/tmp/$$$$; \
+       export DIST_MANIFEST DIST_ROOT; \
+       rm -f $$DIST_MANIFEST; \
+       echo === install === && $(MAKE) -C $(TOPDIR) install || exit $$?; \
+       if [ -x $(TAR) ]; then \
+           ( echo "=== tar ===" && $(MAKEF) -C tar $@ || exit $$? ); \
+       fi; \
+       if [ -x $(RPM) ]; then \
+           ( echo "=== rpm ===" && $(MAKEF) -C rpm $@ || exit $$? ); \
+       fi; \
+       test -z "$$KEEP_DIST_ROOT" || rm -rf $$DIST_ROOT; echo Done
diff --git a/build/rpm/Makefile b/build/rpm/Makefile
new file mode 100644 (file)
index 0000000..2625370
--- /dev/null
@@ -0,0 +1,78 @@
+#
+# Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+# 
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# 
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like.  Any license provided herein, whether implied or
+# otherwise, applies only to this software file.  Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write the Free Software Foundation, Inc., 59
+# Temple Place - Suite 330, Boston MA 02111-1307, USA.
+# 
+# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+# Mountain View, CA  94043, or:
+# 
+# http://www.sgi.com 
+# 
+# For further information regarding this notice, see: 
+# 
+# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+#
+
+TOPDIR = ../..
+TREEROOT = $(shell cd ${TOPDIR}; pwd)
+include $(TOPDIR)/include/builddefs
+
+SPECF = xfs-cmds.spec
+
+LDIRT = $(PACKAGE_NAME)-$(PACKAGE_VERSION)-[0-9]*.*.rpm $(SPECF) \
+       rpmmacros rpm-$(RPM_VERSION).rc
+
+LSRCFILES = macros.template $(SPECF).in rpm-2.rc.template
+
+default install :
+
+include $(BUILDRULES)
+
+# generate a binary rpm file
+dist : default $(SPECF) rpm-$(RPM_VERSION).rc
+       $(RPM) -ba --rcfile ./rpm-$(RPM_VERSION).rc $(SPECF)
+
+# Because rpm prior to v.2.90 does not support macros and old style config
+# is not supported by rpm v.3, we have to resort to such ugly hacks
+ifneq ($RPM_VERSION,2)
+rpm-$(RPM_VERSION).rc : rpmmacros
+       sed -e '/^macrofiles:/s|~/.rpmmacros|./rpmmacros|' </usr/lib/rpm/rpmrc >$@
+
+rpmmacros : macros.template
+       @sed -e 's|%topdir%|$(TREEROOT)|g' < $< > $@
+else
+rpm-2.rc: rpm-2.rc.template
+       @sed -e 's|%topdir%|$(TOPDIR)|g' < $< > $@
+endif
+
+.PHONY: $(SPECF)
+${SPECF} : ${SPECF}.in
+       sed -e's|@package_name@|$(PACKAGE_NAME)|g' \
+           -e's|@package_version@|$(PACKAGE_VERSION)|g' \
+           -e's|@package_release@|$(PACKAGE_RELEASE)|g' \
+           -e's|@package_distribution@|$(PACKAGE_DISTRIBUTION)|g' \
+           -e's|@package_builder@|$(PACKAGE_BUILDER)|g' \
+           -e's|@build_root@|$(DIST_ROOT)|g' \
+           -e'/^BuildRoot: *$$/d' \
+           -e's|@xfs_cmds_var_dir@|$(XFS_CMDS_VAR_DIR)|g' \
+           -e's|@xfs_cmds_share_dir@|$(XFS_CMDS_SHARE_DIR)|g' \
+           -e's|@xfs_cmds_log_dir@|$(XFS_CMDS_LOG_DIR)|g' \
+           -e's|@xfs_cmds_tmp_dir@|$(XFS_CMDS_TMP_DIR)|g' \
+           -e's|@make@|$(MAKE)|g' < $< > $@
diff --git a/build/rpm/macros.template b/build/rpm/macros.template
new file mode 100644 (file)
index 0000000..6ab46e1
--- /dev/null
@@ -0,0 +1,30 @@
+#
+# rpmrc.template
+#
+# Template to fudge rpm directory structure inside IRIX-like build
+# environment
+
+# Force 386 build on all platforms
+%_target i386-pc-linux
+%_target_cpu i386
+%_target_os linux
+
+# topdir == $(WORKAREA)
+%_topdir %topdir%
+
+# Following directories are specific to the topdir
+# This is where build is done. In our case it's the same as $WORKAREA
+%_builddir %topdir%
+
+# This is where foo.1.99.tar.gz is living in the real world.
+# Be careful not to run full rpm build as it will override the sources 
+%_sourcedir %topdir%/build
+
+# This is where binary RPM and source RPM would end up
+%_rpmdir    %topdir%/build/rpm
+%_srcrpmdir %topdir%/build/rpm
+%_specdir   %topdir%/build/rpm
+
+# Leave RPM files in the same directory - we're not building for 
+# multiple architectures
+%_rpmfilename %%{NAME}-%%{VERSION}-%%{RELEASE}.%%{ARCH}.rpm
diff --git a/build/rpm/rpm-2.rc.template b/build/rpm/rpm-2.rc.template
new file mode 100644 (file)
index 0000000..f3b3eba
--- /dev/null
@@ -0,0 +1,25 @@
+#
+# rpmrc.template
+#
+# Template to fudge rpm directory structure inside IRIX-like build
+# environment
+
+# topdir == $(WORKAREA)
+topdir: %topdir%
+
+# Following directories are specific to the topdir
+# This is where build is done. In out case it's the same as $WORKAREA
+# Be careful not to run full rpm build as it will override the sources 
+builddir: %topdir%
+
+# This is where foo.1.99.tar.gz is living in the real world.
+sourcedir: %topdir%/build
+
+# This is where binary RPM and source RPM would end up
+rpmdir:    %topdir%/build/rpm
+srcrpmdir:  %topdir%/build/rpm
+specdir:   %topdir%/build/rpm
+
+# Leave RPM files in the same directory - we're not building for 
+# multiple architectures
+rpmfilename: %{NAME}-%{VERSION}-%{RELEASE}.%{ARCH}.rpm
diff --git a/build/tar/Makefile b/build/tar/Makefile
new file mode 100644 (file)
index 0000000..e010d97
--- /dev/null
@@ -0,0 +1,50 @@
+#
+# Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+# 
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# 
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like.  Any license provided herein, whether implied or
+# otherwise, applies only to this software file.  Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write the Free Software Foundation, Inc., 59
+# Temple Place - Suite 330, Boston MA 02111-1307, USA.
+# 
+# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+# Mountain View, CA  94043, or:
+# 
+# http://www.sgi.com 
+# 
+# For further information regarding this notice, see: 
+# 
+# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+#
+
+TOPDIR = ../..
+include $(TOPDIR)/include/builddefs
+
+BINTAR=$(PACKAGE_NAME)-$(PACKAGE_VERSION).tar.gz
+LDIRT = $(BINTAR)
+
+default install :
+
+include $(BUILDRULES)
+
+dist : default
+       @HERE=`pwd`; cd $${DIST_ROOT:-/}; \
+       sort $$HERE/../bin-manifest | uniq | $(AWK) ' \
+               $$1 == "f" { printf (".%s\n", $$6); } \
+               $$1 == "d" { next; } \
+               $$1 == "l" { printf (".%s\n", $$3); }' \
+       | $(TAR) -T - -cf - | $(ZIP) --best > $$HERE/$(BINTAR)
+       @echo Wrote: `pwd`/$(BINTAR)
diff --git a/db/Makefile b/db/Makefile
new file mode 100644 (file)
index 0000000..74cacba
--- /dev/null
@@ -0,0 +1,58 @@
+#
+# Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+# 
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# 
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like.  Any license provided herein, whether implied or
+# otherwise, applies only to this software file.  Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write the Free Software Foundation, Inc., 59
+# Temple Place - Suite 330, Boston MA 02111-1307, USA.
+# 
+# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+# Mountain View, CA  94043, or:
+# 
+# http://www.sgi.com 
+# 
+# For further information regarding this notice, see: 
+# 
+# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+#
+
+TOPDIR = ..
+include $(TOPDIR)/include/builddefs
+
+CMDTARGET = xfs_db
+CMDDEPS = $(LIBXFS)
+
+HFILES = addr.h agf.h agfl.h agi.h attr.h attrshort.h bit.h block.h bmap.h \
+       bmapbt.h bmroot.h bnobt.h check.h cntbt.h command.h convert.h data.h \
+       dbread.h debug.h dir.h dir2.h dir2sf.h dirshort.h dquot.h echo.h \
+       faddr.h field.h flist.h fprint.h frag.h freesp.h hash.h help.h \
+       init.h inobt.h inode.h input.h io.h malloc.h mount.h output.h \
+       print.h quit.h sb.h uuid.h sig.h strvec.h type.h write.h
+CFILES = $(HFILES:.h=.c) main.c
+LSRCFILES = xfs_admin.sh xfs_check.sh xfs_ncheck.sh
+LLDLIBS        = $(LIBXFS) $(LIBUUID)
+
+default: $(CMDTARGET)
+
+include $(BUILDRULES)
+
+install: default
+       $(INSTALL) -m 755 -d $(XFS_CMDS_BIN_DIR)
+       $(INSTALL) -m 755 $(CMDTARGET) $(XFS_CMDS_BIN_DIR)
+       $(INSTALL) -m 755 xfs_admin.sh $(XFS_CMDS_BIN_DIR)/xfs_admin
+       $(INSTALL) -m 755 xfs_check.sh $(XFS_CMDS_BIN_DIR)/xfs_check
+       $(INSTALL) -m 755 xfs_ncheck.sh $(XFS_CMDS_BIN_DIR)/xfs_ncheck
diff --git a/db/addr.c b/db/addr.c
new file mode 100644 (file)
index 0000000..cb69ecf
--- /dev/null
+++ b/db/addr.c
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "addr.h"
+#include "command.h"
+#include "type.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "io.h"
+#include "flist.h"
+#include "inode.h"
+#include "output.h"
+
+static int addr_f(int argc, char **argv);
+static void addr_help(void);
+
+static const cmdinfo_t addr_cmd =
+       { "addr", "a", addr_f, 0, 1, 1, "[field-expression]",
+         "set current address", addr_help };
+
+static void
+addr_help(void)
+{
+       dbprintf(
+"\n"
+" 'addr' uses the given field to set the filesystem address and type\n"
+"\n"
+" Examples:\n"
+"\n"
+" sb\n"
+" a rootino - set the type to inode and set position to the root inode\n"
+" a u.bmx[0].startblock (for inode with blockmap)\n"
+"\n"
+);
+
+}
+
+static int
+addr_f(
+       int             argc,
+       char            **argv)
+{
+       adfnc_t         adf;
+       const ftattr_t  *fa;
+       flist_t         *fl;
+       const field_t   *fld;
+       typnm_t         next;
+       flist_t         *tfl;
+
+       if (argc == 1) {
+               print_iocur("current", iocur_top);
+               return 0;
+       }
+       if (cur_typ == NULL) {
+               dbprintf("no current type\n");
+               return 0;
+       }
+       fld = cur_typ->fields;
+       if (fld != NULL && fld->name[0] == '\0') {
+               fa = &ftattrtab[fld->ftyp];
+               ASSERT(fa->ftyp == fld->ftyp);
+               fld = fa->subfld;
+       }
+       if (fld == NULL) {
+               dbprintf("no fields for type %s\n", cur_typ->name);
+               return 0;
+       }
+       fl = flist_scan(argv[1]);
+       if (fl == NULL)
+               return 0;
+       if (!flist_parse(fld, fl, iocur_top->data, 0)) {
+               flist_free(fl);
+               return 0;
+       }
+       flist_print(fl);
+       for (tfl = fl; tfl->child != NULL; tfl = tfl->child) {
+               if ((tfl->flags & FL_OKLOW) && tfl->low < tfl->high) {
+                       dbprintf("array not allowed for addr command\n");
+                       flist_free(fl);
+                       return 0;
+               }
+       }
+       fld = tfl->fld;
+       next = fld->next;
+       if (next == TYP_INODATA)
+               next = inode_next_type();
+       if (next == TYP_NONE) {
+               dbprintf("no next type for field %s\n", fld->name);
+               return 0;
+       }
+       fa = &ftattrtab[fld->ftyp];
+       ASSERT(fa->ftyp == fld->ftyp);
+       adf = fa->adfunc;
+       if (adf == NULL) {
+               dbprintf("no addr function for field %s (type %s)\n",
+                       fld->name, fa->name);
+               return 0;
+       }
+       (*adf)(iocur_top->data, tfl->offset, next);
+       flist_free(fl);
+       return 0;
+}
+
+void
+addr_init(void)
+{
+       add_command(&addr_cmd);
+}
diff --git a/db/addr.h b/db/addr.h
new file mode 100644 (file)
index 0000000..4b61e4d
--- /dev/null
+++ b/db/addr.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+extern void    addr_init(void);
diff --git a/db/agf.c b/db/agf.c
new file mode 100644 (file)
index 0000000..a9a5c44
--- /dev/null
+++ b/db/agf.c
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "agf.h"
+#include "command.h"
+#include "data.h"
+#include "type.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "io.h"
+#include "bit.h"
+#include "output.h"
+#include "mount.h"
+
+static int agf_f(int argc, char **argv);
+static void agf_help(void);
+
+static const cmdinfo_t agf_cmd =
+       { "agf", NULL, agf_f, 0, 1, 1, "[agno]",
+         "set address to agf header", agf_help };
+
+const field_t  agf_hfld[] = {
+       { "", FLDT_AGF, OI(0), C1, 0, TYP_NONE },
+       { NULL }
+};
+
+#define        OFF(f)  bitize(offsetof(xfs_agf_t, agf_ ## f))
+#define        SZ(f)   bitszof(xfs_agf_t, agf_ ## f)
+const field_t  agf_flds[] = {
+       { "magicnum", FLDT_UINT32X, OI(OFF(magicnum)), C1, 0, TYP_NONE },
+       { "versionnum", FLDT_UINT32D, OI(OFF(versionnum)), C1, 0, TYP_NONE },
+       { "seqno", FLDT_AGNUMBER, OI(OFF(seqno)), C1, 0, TYP_NONE },
+       { "length", FLDT_AGBLOCK, OI(OFF(length)), C1, 0, TYP_NONE },
+       { "roots", FLDT_AGBLOCK, OI(OFF(roots)), CI(XFS_BTNUM_AGF),
+         FLD_ARRAY|FLD_SKIPALL, TYP_NONE },
+       { "bnoroot", FLDT_AGBLOCK,
+         OI(OFF(roots) + XFS_BTNUM_BNO * SZ(roots[XFS_BTNUM_BNO])), C1, 0,
+         TYP_BNOBT },
+       { "cntroot", FLDT_AGBLOCK,
+         OI(OFF(roots) + XFS_BTNUM_CNT * SZ(roots[XFS_BTNUM_CNT])), C1, 0,
+         TYP_CNTBT },
+       { "levels", FLDT_UINT32D, OI(OFF(levels)), CI(XFS_BTNUM_AGF),
+         FLD_ARRAY|FLD_SKIPALL, TYP_NONE },
+       { "bnolevel", FLDT_UINT32D,
+         OI(OFF(levels) + XFS_BTNUM_BNO * SZ(levels[XFS_BTNUM_BNO])), C1, 0,
+         TYP_NONE },
+       { "cntlevel", FLDT_UINT32D,
+         OI(OFF(levels) + XFS_BTNUM_CNT * SZ(levels[XFS_BTNUM_CNT])), C1, 0,
+         TYP_NONE },
+       { "flfirst", FLDT_UINT32D, OI(OFF(flfirst)), C1, 0, TYP_NONE },
+       { "fllast", FLDT_UINT32D, OI(OFF(fllast)), C1, 0, TYP_NONE },
+       { "flcount", FLDT_UINT32D, OI(OFF(flcount)), C1, 0, TYP_NONE },
+       { "freeblks", FLDT_EXTLEN, OI(OFF(freeblks)), C1, 0, TYP_NONE },
+       { "longest", FLDT_EXTLEN, OI(OFF(longest)), C1, 0, TYP_NONE },
+       { NULL }
+};
+
+static void
+agf_help(void)
+{
+       dbprintf(
+"\n"
+" set allocation group free block list\n"
+"\n"
+" Example:\n"
+"\n"
+" agf 2 - move location to AGF in 2nd filesystem allocation group\n"
+"\n"
+" Located in the 2nd 512 byte block of each allocation group,\n"
+" the AGF contains the root of two different freespace btrees:\n"
+" The 'cnt' btree keeps track freespace indexed on section size.\n"
+" The 'bno' btree tracks sections of freespace indexed on block number.\n"
+);
+}
+
+static int
+agf_f(
+       int             argc,
+       char            **argv)
+{
+       xfs_agnumber_t  agno;
+       char            *p;
+
+       if (argc > 1) {
+               agno = (xfs_agnumber_t)strtoul(argv[1], &p, 0);
+               if (*p != '\0' || agno >= mp->m_sb.sb_agcount) {
+                       dbprintf("bad allocation group number %s\n", argv[1]);
+                       return 0;
+               }
+               cur_agno = agno;
+       } else if (cur_agno == NULLAGNUMBER)
+               cur_agno = 0;
+       ASSERT(typtab[TYP_AGF].typnm == TYP_AGF);
+       set_cur(&typtab[TYP_AGF], XFS_AG_DADDR(mp, cur_agno, XFS_AGF_DADDR), 1,
+               DB_RING_ADD, NULL);
+       return 0;
+}
+
+void
+agf_init(void)
+{
+       add_command(&agf_cmd);
+}
+
+int
+agf_size(
+       void    *obj,
+       int     startoff,
+       int     idx)
+{
+       return bitize(mp->m_sb.sb_sectsize);
+}
diff --git a/db/agf.h b/db/agf.h
new file mode 100644 (file)
index 0000000..26ce849
--- /dev/null
+++ b/db/agf.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+struct field;
+
+extern const struct field      agf_flds[];
+extern const struct field      agf_hfld[];
+
+extern void    agf_init(void);
+extern int     agf_size(void *obj, int startoff, int idx);
diff --git a/db/agfl.c b/db/agfl.c
new file mode 100644 (file)
index 0000000..3d7f0a0
--- /dev/null
+++ b/db/agfl.c
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "agfl.h"
+#include "command.h"
+#include "data.h"
+#include "type.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "io.h"
+#include "bit.h"
+#include "output.h"
+#include "mount.h"
+
+static int agfl_f(int argc, char **argv);
+static void agfl_help(void);
+
+static const cmdinfo_t agfl_cmd =
+       { "agfl", NULL, agfl_f, 0, 1, 1, "[agno]", 
+         "set address to agfl block", agfl_help };
+
+const field_t  agfl_hfld[] = {
+       { "", FLDT_AGFL, OI(0), C1, 0, TYP_NONE },
+       { NULL }
+};
+
+#define        OFF(f)  bitize(offsetof(xfs_agfl_t, agfl_ ## f))
+const field_t  agfl_flds[] = {
+       { "bno", FLDT_AGBLOCKNZ, OI(OFF(bno)), CI(XFS_AGFL_SIZE), FLD_ARRAY,
+         TYP_DATA },
+       { NULL }
+};
+
+static void
+agfl_help(void)
+{
+       dbprintf(
+"\n"
+" set allocation group freelist\n"
+"\n"
+" Example:\n"
+"\n"
+" agfl 5"
+"\n"
+" Located in the 4th 512 byte block of each allocation group,\n"
+" the agfl freelist for internal btree space allocation is maintained\n"
+" for each allocation group.  This acts as a reserved pool of space\n" 
+" separate from the general filesystem freespace (not used for user data).\n"
+"\n"
+);
+
+}
+
+static int
+agfl_f(
+       int             argc,
+       char            **argv)
+{
+       xfs_agnumber_t  agno;
+       char            *p;
+
+       if (argc > 1) {
+               agno = (xfs_agnumber_t)strtoul(argv[1], &p, 0);
+               if (*p != '\0' || agno >= mp->m_sb.sb_agcount) {
+                       dbprintf("bad allocation group number %s\n", argv[1]);
+                       return 0;
+               }
+               cur_agno = agno;
+       } else if (cur_agno == NULLAGNUMBER)
+               cur_agno = 0;
+       ASSERT(typtab[TYP_AGFL].typnm == TYP_AGFL);
+       set_cur(&typtab[TYP_AGFL], XFS_AG_DADDR(mp, cur_agno, XFS_AGFL_DADDR),
+               1, DB_RING_ADD, NULL);
+       return 0;
+}
+
+void
+agfl_init(void)
+{
+       add_command(&agfl_cmd);
+}
+
+/*ARGSUSED*/
+int
+agfl_size(
+       void    *obj,
+       int     startoff,
+       int     idx)
+{
+       return bitize(mp->m_sb.sb_sectsize);
+}
diff --git a/db/agfl.h b/db/agfl.h
new file mode 100644 (file)
index 0000000..37670c3
--- /dev/null
+++ b/db/agfl.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+struct field;
+
+extern const struct field      agfl_flds[];
+extern const struct field      agfl_hfld[];
+
+extern void    agfl_init(void);
+extern int     agfl_size(void *obj, int startoff, int idx);
diff --git a/db/agi.c b/db/agi.c
new file mode 100644 (file)
index 0000000..f1c5651
--- /dev/null
+++ b/db/agi.c
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "agi.h"
+#include "command.h"
+#include "data.h"
+#include "type.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "io.h"
+#include "bit.h"
+#include "output.h"
+#include "mount.h"
+
+static int agi_f(int argc, char **argv);
+static void agi_help(void);
+
+static const cmdinfo_t agi_cmd =
+       { "agi", NULL, agi_f, 0, 1, 1, "[agno]",
+         "set address to agi header", agi_help };
+
+const field_t  agi_hfld[] = {
+       { "", FLDT_AGI, OI(0), C1, 0, TYP_NONE },
+       { NULL }
+};
+
+#define        OFF(f)  bitize(offsetof(xfs_agi_t, agi_ ## f))
+const field_t  agi_flds[] = {
+       { "magicnum", FLDT_UINT32X, OI(OFF(magicnum)), C1, 0, TYP_NONE },
+       { "versionnum", FLDT_UINT32D, OI(OFF(versionnum)), C1, 0, TYP_NONE },
+       { "seqno", FLDT_AGNUMBER, OI(OFF(seqno)), C1, 0, TYP_NONE },
+       { "length", FLDT_AGBLOCK, OI(OFF(length)), C1, 0, TYP_NONE },
+       { "count", FLDT_AGINO, OI(OFF(count)), C1, 0, TYP_NONE },
+       { "root", FLDT_AGBLOCK, OI(OFF(root)), C1, 0, TYP_INOBT },
+       { "level", FLDT_UINT32D, OI(OFF(level)), C1, 0, TYP_NONE },
+       { "freecount", FLDT_AGINO, OI(OFF(freecount)), C1, 0, TYP_NONE },
+       { "newino", FLDT_AGINO, OI(OFF(newino)), C1, 0, TYP_INODE },
+       { "dirino", FLDT_AGINO, OI(OFF(dirino)), C1, 0, TYP_INODE },
+       { "unlinked", FLDT_AGINONN, OI(OFF(unlinked)),
+         CI(XFS_AGI_UNLINKED_BUCKETS), FLD_ARRAY, TYP_NONE },
+       { NULL }
+};
+
+static void
+agi_help(void)
+{
+       dbprintf(
+"\n"
+" set allocation group inode btree\n"
+"\n"
+" Example:\n"
+"\n"
+" agi 3 (set location to 3rd allocation group inode btree and type to 'agi')\n"
+"\n"
+" Located in the 3rd 512 byte block of each allocation group,\n"
+" the agi inode btree tracks all used/free inodes in the allocation group.\n"
+" Inodes are allocated in 16k 'chunks', each btree entry tracks a 'chunk'.\n"
+"\n"
+);
+}
+
+static int
+agi_f(
+       int             argc,
+       char            **argv)
+{
+       xfs_agnumber_t  agno;
+       char            *p;
+
+       if (argc > 1) {
+               agno = (xfs_agnumber_t)strtoul(argv[1], &p, 0);
+               if (*p != '\0' || agno >= mp->m_sb.sb_agcount) {
+                       dbprintf("bad allocation group number %s\n", argv[1]);
+                       return 0;
+               }
+               cur_agno = agno;
+       } else if (cur_agno == NULLAGNUMBER)
+               cur_agno = 0;
+       ASSERT(typtab[TYP_AGI].typnm == TYP_AGI);
+       set_cur(&typtab[TYP_AGI], XFS_AG_DADDR(mp, cur_agno, XFS_AGI_DADDR), 1,
+               DB_RING_ADD, NULL);
+       return 0;
+}
+
+void
+agi_init(void)
+{
+       add_command(&agi_cmd);
+}
+
+/*ARGSUSED*/
+int
+agi_size(
+       void    *obj,
+       int     startoff,
+       int     idx)
+{
+       return bitize(mp->m_sb.sb_sectsize);
+}
diff --git a/db/agi.h b/db/agi.h
new file mode 100644 (file)
index 0000000..6d3881d
--- /dev/null
+++ b/db/agi.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+struct field;
+
+extern const struct field      agi_flds[];
+extern const struct field      agi_hfld[];
+
+extern void    agi_init(void);
+extern int     agi_size(void *obj, int startoff, int idx);
diff --git a/db/attr.c b/db/attr.c
new file mode 100644 (file)
index 0000000..5828d0e
--- /dev/null
+++ b/db/attr.c
@@ -0,0 +1,465 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "bit.h"
+#include "type.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "attr.h"
+#include "io.h"
+#include "data.h"
+#include "mount.h"
+
+static int     attr_leaf_entries_count(void *obj, int startoff);
+static int     attr_leaf_hdr_count(void *obj, int startoff);
+static int     attr_leaf_name_local_count(void *obj, int startoff);
+static int     attr_leaf_name_local_name_count(void *obj, int startoff);
+static int     attr_leaf_name_local_value_count(void *obj, int startoff);
+static int     attr_leaf_name_local_value_offset(void *obj, int startoff,
+                                                 int idx);
+static int     attr_leaf_name_remote_count(void *obj, int startoff);
+static int     attr_leaf_name_remote_name_count(void *obj, int startoff);
+static int     attr_leaf_nvlist_count(void *obj, int startoff);
+static int     attr_leaf_nvlist_offset(void *obj, int startoff, int idx);
+static int     attr_node_btree_count(void *obj, int startoff);
+static int     attr_node_hdr_count(void *obj, int startoff);
+
+const field_t  attr_hfld[] = {
+       { "", FLDT_ATTR, OI(0), C1, 0, TYP_NONE },
+       { NULL }
+};
+
+#define        LOFF(f) bitize(offsetof(xfs_attr_leafblock_t, f))
+#define        NOFF(f) bitize(offsetof(xfs_da_intnode_t, f))
+const field_t  attr_flds[] = {
+       { "hdr", FLDT_ATTR_LEAF_HDR, OI(LOFF(hdr)), attr_leaf_hdr_count,
+         FLD_COUNT, TYP_NONE },
+       { "hdr", FLDT_ATTR_NODE_HDR, OI(NOFF(hdr)), attr_node_hdr_count,
+         FLD_COUNT, TYP_NONE },
+       { "entries", FLDT_ATTR_LEAF_ENTRY, OI(LOFF(entries)),
+         attr_leaf_entries_count, FLD_ARRAY|FLD_COUNT, TYP_NONE },
+       { "btree", FLDT_ATTR_NODE_ENTRY, OI(NOFF(btree)), attr_node_btree_count,
+         FLD_ARRAY|FLD_COUNT, TYP_NONE },
+       { "nvlist", FLDT_ATTR_LEAF_NAME, attr_leaf_nvlist_offset,
+         attr_leaf_nvlist_count, FLD_ARRAY|FLD_OFFSET|FLD_COUNT, TYP_NONE },
+       { NULL }
+};
+
+#define        BOFF(f) bitize(offsetof(xfs_da_blkinfo_t, f))
+const field_t  attr_blkinfo_flds[] = {
+       { "forw", FLDT_ATTRBLOCK, OI(BOFF(forw)), C1, 0, TYP_ATTR },
+       { "back", FLDT_ATTRBLOCK, OI(BOFF(back)), C1, 0, TYP_ATTR },
+       { "magic", FLDT_UINT16X, OI(BOFF(magic)), C1, 0, TYP_NONE },
+       { "pad", FLDT_UINT16X, OI(BOFF(pad)), C1, FLD_SKIPALL, TYP_NONE },
+       { NULL }
+};
+
+#define        LEOFF(f)        bitize(offsetof(xfs_attr_leaf_entry_t, f))
+const field_t  attr_leaf_entry_flds[] = {
+       { "hashval", FLDT_UINT32X, OI(LEOFF(hashval)), C1, 0, TYP_NONE },
+       { "nameidx", FLDT_UINT16D, OI(LEOFF(nameidx)), C1, 0, TYP_NONE },
+       { "flags", FLDT_UINT8X, OI(LEOFF(flags)), C1, FLD_SKIPALL, TYP_NONE },
+       { "incomplete", FLDT_UINT1,
+         OI(LEOFF(flags) + bitsz(__uint8_t) - XFS_ATTR_INCOMPLETE_BIT - 1), C1,
+         0, TYP_NONE },
+       { "root", FLDT_UINT1,
+         OI(LEOFF(flags) + bitsz(__uint8_t) - XFS_ATTR_ROOT_BIT - 1), C1, 0,
+         TYP_NONE },
+       { "local", FLDT_UINT1,
+         OI(LEOFF(flags) + bitsz(__uint8_t) - XFS_ATTR_LOCAL_BIT - 1), C1, 0,
+         TYP_NONE },
+       { "pad2", FLDT_UINT8X, OI(LEOFF(pad2)), C1, FLD_SKIPALL, TYP_NONE },
+       { NULL }
+};
+
+#define        LHOFF(f)        bitize(offsetof(xfs_attr_leaf_hdr_t, f))
+const field_t  attr_leaf_hdr_flds[] = {
+       { "info", FLDT_ATTR_BLKINFO, OI(LHOFF(info)), C1, 0, TYP_NONE },
+       { "count", FLDT_UINT16D, OI(LHOFF(count)), C1, 0, TYP_NONE },
+       { "usedbytes", FLDT_UINT16D, OI(LHOFF(usedbytes)), C1, 0, TYP_NONE },
+       { "firstused", FLDT_UINT16D, OI(LHOFF(firstused)), C1, 0, TYP_NONE },
+       { "holes", FLDT_UINT8D, OI(LHOFF(holes)), C1, 0, TYP_NONE },
+       { "pad1", FLDT_UINT8X, OI(LHOFF(pad1)), C1, FLD_SKIPALL, TYP_NONE },
+       { "freemap", FLDT_ATTR_LEAF_MAP, OI(LHOFF(freemap)),
+         CI(XFS_ATTR_LEAF_MAPSIZE), FLD_ARRAY, TYP_NONE },
+       { NULL }
+};
+
+#define        LMOFF(f)        bitize(offsetof(xfs_attr_leaf_map_t, f))
+const field_t  attr_leaf_map_flds[] = {
+       { "base", FLDT_UINT16D, OI(LMOFF(base)), C1, 0, TYP_NONE },
+       { "size", FLDT_UINT16D, OI(LMOFF(size)), C1, 0, TYP_NONE },
+       { NULL }
+};
+
+#define        LNOFF(f)        bitize(offsetof(xfs_attr_leaf_name_local_t, f))
+#define        LVOFF(f)        bitize(offsetof(xfs_attr_leaf_name_remote_t, f))
+const field_t  attr_leaf_name_flds[] = {
+       { "valuelen", FLDT_UINT16D, OI(LNOFF(valuelen)),
+         attr_leaf_name_local_count, FLD_COUNT, TYP_NONE },
+       { "namelen", FLDT_UINT8D, OI(LNOFF(namelen)),
+         attr_leaf_name_local_count, FLD_COUNT, TYP_NONE },
+       { "name", FLDT_CHARNS, OI(LNOFF(nameval)),
+         attr_leaf_name_local_name_count, FLD_COUNT, TYP_NONE },
+       { "value", FLDT_CHARNS, attr_leaf_name_local_value_offset,
+         attr_leaf_name_local_value_count, FLD_COUNT|FLD_OFFSET, TYP_NONE },
+       { "valueblk", FLDT_UINT32X, OI(LVOFF(valueblk)),
+         attr_leaf_name_remote_count, FLD_COUNT, TYP_NONE },
+       { "valuelen", FLDT_UINT32D, OI(LVOFF(valuelen)),
+         attr_leaf_name_remote_count, FLD_COUNT, TYP_NONE },
+       { "namelen", FLDT_UINT8D, OI(LVOFF(namelen)),
+         attr_leaf_name_remote_count, FLD_COUNT, TYP_NONE },
+       { "name", FLDT_CHARNS, OI(LVOFF(name)),
+         attr_leaf_name_remote_name_count, FLD_COUNT, TYP_NONE },
+       { NULL }
+};
+
+#define        EOFF(f) bitize(offsetof(xfs_da_node_entry_t, f))
+const field_t  attr_node_entry_flds[] = {
+       { "hashval", FLDT_UINT32X, OI(EOFF(hashval)), C1, 0, TYP_NONE },
+       { "before", FLDT_ATTRBLOCK, OI(EOFF(before)), C1, 0, TYP_ATTR },
+       { NULL }
+};
+
+#define        HOFF(f) bitize(offsetof(xfs_da_node_hdr_t, f))
+const field_t  attr_node_hdr_flds[] = {
+       { "info", FLDT_ATTR_BLKINFO, OI(HOFF(info)), C1, 0, TYP_NONE },
+       { "count", FLDT_UINT16D, OI(HOFF(count)), C1, 0, TYP_NONE },
+       { "level", FLDT_UINT16D, OI(HOFF(level)), C1, 0, TYP_NONE },
+       { NULL }
+};
+
+/*ARGSUSED*/
+static int
+attr_leaf_entries_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_attr_leafblock_t    *block;
+       
+       ASSERT(startoff == 0);
+       block = obj;
+       if (INT_GET(block->hdr.info.magic, ARCH_CONVERT)
+                                               != XFS_ATTR_LEAF_MAGIC) {
+               return 0;
+       }
+
+       return INT_GET(block->hdr.count, ARCH_CONVERT);
+}
+
+/*ARGSUSED*/
+static int
+attr_leaf_hdr_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_attr_leafblock_t    *block;
+       
+       ASSERT(startoff == 0);
+       block = obj;
+       return INT_GET(block->hdr.info.magic, ARCH_CONVERT)
+                                               == XFS_ATTR_LEAF_MAGIC;
+}
+
+static int
+attr_leaf_name_local_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_attr_leafblock_t    *block;
+       xfs_attr_leaf_entry_t   *e;
+       int                     i;
+       int                     off;
+
+       ASSERT(bitoffs(startoff) == 0);
+       off = byteize(startoff);
+       block = obj;
+       if (INT_GET(block->hdr.info.magic, ARCH_CONVERT)
+                                               != XFS_ATTR_LEAF_MAGIC)
+               return 0;
+       for (i = 0; i < INT_GET(block->hdr.count, ARCH_CONVERT); i++) {
+               e = &block->entries[i];
+               if (INT_GET(e->nameidx, ARCH_CONVERT) == off)
+                       return (INT_GET(e->flags, ARCH_CONVERT)
+                                               & XFS_ATTR_LOCAL) != 0;
+       }
+       return 0;
+}
+
+static int
+attr_leaf_name_local_name_count(
+       void                            *obj,
+       int                             startoff)
+{
+       xfs_attr_leafblock_t            *block;
+       xfs_attr_leaf_entry_t           *e;
+       int                             i;
+       xfs_attr_leaf_name_local_t      *l;
+       int                             off;
+
+       ASSERT(bitoffs(startoff) == 0);
+       off = byteize(startoff);
+       block = obj;
+       if (INT_GET(block->hdr.info.magic, ARCH_CONVERT)
+                                               != XFS_ATTR_LEAF_MAGIC)
+               return 0;
+       for (i = 0; i < INT_GET(block->hdr.count, ARCH_CONVERT); i++) {
+               e = &block->entries[i];
+               if (INT_GET(e->nameidx, ARCH_CONVERT) == off) {
+                       if (INT_GET(e->flags, ARCH_CONVERT) & XFS_ATTR_LOCAL) {
+                               l = XFS_ATTR_LEAF_NAME_LOCAL(block, i);
+                               return INT_GET(l->namelen, ARCH_CONVERT);
+                       } else
+                               return 0;
+               }
+       }
+       return 0;
+}
+
+static int
+attr_leaf_name_local_value_count(
+       void                            *obj,
+       int                             startoff)
+{
+       xfs_attr_leafblock_t            *block;
+       xfs_attr_leaf_entry_t           *e;
+       int                             i;
+       xfs_attr_leaf_name_local_t      *l;
+       int                             off;
+
+       ASSERT(bitoffs(startoff) == 0);
+       off = byteize(startoff);
+       block = obj;
+       if (INT_GET(block->hdr.info.magic, ARCH_CONVERT)
+                                               != XFS_ATTR_LEAF_MAGIC)
+               return 0;
+       for (i = 0; i < INT_GET(block->hdr.count, ARCH_CONVERT); i++) {
+               e = &block->entries[i];
+               if (INT_GET(e->nameidx, ARCH_CONVERT) == off) {
+                       if (INT_GET(e->flags, ARCH_CONVERT) & XFS_ATTR_LOCAL) {
+                               l = XFS_ATTR_LEAF_NAME_LOCAL(block, i);
+                               return INT_GET(l->valuelen, ARCH_CONVERT);
+                       } else
+                               return 0;
+               }
+       }
+       return 0;
+}
+
+/*ARGSUSED*/
+static int
+attr_leaf_name_local_value_offset(
+       void                            *obj,
+       int                             startoff,
+       int                             idx)
+{
+       xfs_attr_leafblock_t            *block;
+       xfs_attr_leaf_name_local_t      *l;
+       char                            *vp;
+       int                             off;
+       xfs_attr_leaf_entry_t           *e;
+       int                             i;
+
+       ASSERT(bitoffs(startoff) == 0);
+       off = byteize(startoff);
+       block = obj;
+       if (INT_GET(block->hdr.info.magic, ARCH_CONVERT)
+                                               != XFS_ATTR_LEAF_MAGIC)
+               return 0;
+        
+       for (i = 0; i < INT_GET(block->hdr.count, ARCH_CONVERT); i++) {
+               e = &block->entries[i];
+               if (INT_GET(e->nameidx, ARCH_CONVERT) == off)
+                    break;
+       }
+        if (i>=INT_GET(block->hdr.count, ARCH_CONVERT)) return 0;
+        
+       l = XFS_ATTR_LEAF_NAME_LOCAL(block, i);
+       vp = (char *)&l->nameval[l->namelen];
+       return (int)bitize(vp - (char *)l);
+}
+
+static int
+attr_leaf_name_remote_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_attr_leafblock_t    *block;
+       xfs_attr_leaf_entry_t   *e;
+       int                     i;
+       int                     off;
+
+       ASSERT(bitoffs(startoff) == 0);
+       off = byteize(startoff);
+       block = obj;
+       if (INT_GET(block->hdr.info.magic, ARCH_CONVERT)
+                                               != XFS_ATTR_LEAF_MAGIC)
+               return 0;
+       for (i = 0; i < INT_GET(block->hdr.count, ARCH_CONVERT); i++) {
+               e = &block->entries[i];
+               if (INT_GET(e->nameidx, ARCH_CONVERT) == off)
+                       return (INT_GET(e->flags, ARCH_CONVERT) & XFS_ATTR_LOCAL) == 0;
+       }
+       return 0;
+}
+
+static int
+attr_leaf_name_remote_name_count(
+       void                            *obj,
+       int                             startoff)
+{
+       xfs_attr_leafblock_t            *block;
+       xfs_attr_leaf_entry_t           *e;
+       int                             i;
+       int                             off;
+       xfs_attr_leaf_name_remote_t     *r;
+
+       ASSERT(bitoffs(startoff) == 0);
+       off = byteize(startoff);
+       block = obj;
+       if (INT_GET(block->hdr.info.magic, ARCH_CONVERT)
+                                               != XFS_ATTR_LEAF_MAGIC)
+               return 0;
+       for (i = 0; i < INT_GET(block->hdr.count, ARCH_CONVERT); i++) {
+               e = &block->entries[i];
+               if (INT_GET(e->nameidx, ARCH_CONVERT) == off) {
+                       if (!(INT_GET(e->flags, ARCH_CONVERT) & XFS_ATTR_LOCAL)) {
+                               r = XFS_ATTR_LEAF_NAME_REMOTE(block, i);
+                               return INT_GET(r->namelen, ARCH_CONVERT);
+                       } else
+                               return 0;
+               }
+       }
+       return 0;
+}
+
+/*ARGSUSED*/
+int
+attr_leaf_name_size(
+       void                            *obj,
+       int                             startoff,
+       int                             idx)
+{
+       xfs_attr_leafblock_t            *block;
+       xfs_attr_leaf_entry_t           *e;
+       xfs_attr_leaf_name_local_t      *l;
+       xfs_attr_leaf_name_remote_t     *r;
+
+       ASSERT(startoff == 0);
+       block = obj;
+       if (INT_GET(block->hdr.info.magic, ARCH_CONVERT)
+                                               != XFS_ATTR_LEAF_MAGIC)
+               return 0;
+       e = &block->entries[idx];
+       if (INT_GET(e->flags, ARCH_CONVERT) & XFS_ATTR_LOCAL) {
+               l = XFS_ATTR_LEAF_NAME_LOCAL(block, idx);
+               return (int)bitize(XFS_ATTR_LEAF_ENTSIZE_LOCAL(INT_GET(l->namelen, ARCH_CONVERT),
+                                                               INT_GET(l->valuelen, ARCH_CONVERT)));
+       } else {
+               r = XFS_ATTR_LEAF_NAME_REMOTE(block, idx);
+               return (int)bitize(XFS_ATTR_LEAF_ENTSIZE_REMOTE(INT_GET(r->namelen, ARCH_CONVERT)));
+       }
+}
+
+/*ARGSUSED*/
+static int
+attr_leaf_nvlist_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_attr_leafblock_t    *block;
+
+       ASSERT(startoff == 0);
+       block = obj;
+       if (INT_GET(block->hdr.info.magic, ARCH_CONVERT)
+                                               != XFS_ATTR_LEAF_MAGIC)
+               return 0;
+       return INT_GET(block->hdr.count, ARCH_CONVERT);
+}
+
+/*ARGSUSED*/
+static int
+attr_leaf_nvlist_offset(
+       void                    *obj,
+       int                     startoff,
+       int                     idx)
+{
+       xfs_attr_leafblock_t    *block;
+       xfs_attr_leaf_entry_t   *e;
+
+       ASSERT(startoff == 0);
+       block = obj;
+       e = &block->entries[idx];
+       return bitize(INT_GET(e->nameidx, ARCH_CONVERT));
+}
+
+/*ARGSUSED*/
+static int
+attr_node_btree_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_da_intnode_t        *block;
+
+       ASSERT(startoff == 0);          /* this is a base structure */
+       block = obj;
+       if (INT_GET(block->hdr.info.magic, ARCH_CONVERT)
+                                               != XFS_DA_NODE_MAGIC)
+               return 0;
+       return INT_GET(block->hdr.count, ARCH_CONVERT);
+}
+
+/*ARGSUSED*/
+static int
+attr_node_hdr_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_da_intnode_t        *block;
+       
+       ASSERT(startoff == 0);
+       block = obj;
+       return INT_GET(block->hdr.info.magic, ARCH_CONVERT)
+                                               == XFS_DA_NODE_MAGIC;
+}
+
+/*ARGSUSED*/
+int
+attr_size(
+       void    *obj,
+       int     startoff,
+       int     idx)
+{
+       return bitize(mp->m_sb.sb_blocksize);
+}
diff --git a/db/attr.h b/db/attr.h
new file mode 100644 (file)
index 0000000..00ae7e9
--- /dev/null
+++ b/db/attr.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+extern const field_t   attr_flds[];
+extern const field_t   attr_hfld[];
+extern const field_t   attr_blkinfo_flds[];
+extern const field_t   attr_leaf_entry_flds[];
+extern const field_t   attr_leaf_hdr_flds[];
+extern const field_t   attr_leaf_map_flds[];
+extern const field_t   attr_leaf_name_flds[];
+extern const field_t   attr_node_entry_flds[];
+extern const field_t   attr_node_hdr_flds[];
+
+extern int     attr_leaf_name_size(void *obj, int startoff, int idx);
+extern int     attr_size(void *obj, int startoff, int idx);
diff --git a/db/attrshort.c b/db/attrshort.c
new file mode 100644 (file)
index 0000000..04477fe
--- /dev/null
@@ -0,0 +1,182 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "type.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "bit.h"
+#include "attrshort.h"
+
+static int     attr_sf_entry_name_count(void *obj, int startoff);
+static int     attr_sf_entry_value_count(void *obj, int startoff);
+static int     attr_sf_entry_value_offset(void *obj, int startoff, int idx);
+static int     attr_shortform_list_count(void *obj, int startoff);
+static int     attr_shortform_list_offset(void *obj, int startoff, int idx);
+
+#define        OFF(f)  bitize(offsetof(xfs_attr_shortform_t, f))
+const field_t  attr_shortform_flds[] = {
+       { "hdr", FLDT_ATTR_SF_HDR, OI(OFF(hdr)), C1, 0, TYP_NONE },
+       { "list", FLDT_ATTR_SF_ENTRY, attr_shortform_list_offset,
+         attr_shortform_list_count, FLD_ARRAY|FLD_COUNT|FLD_OFFSET, TYP_NONE },
+       { NULL }
+};
+
+#define        HOFF(f) bitize(offsetof(xfs_attr_sf_hdr_t, f))
+const field_t  attr_sf_hdr_flds[] = {
+       { "totsize", FLDT_UINT16D, OI(HOFF(totsize)), C1, 0, TYP_NONE },
+       { "count", FLDT_UINT8D, OI(HOFF(count)), C1, 0, TYP_NONE },
+       { NULL }
+};
+
+#define        EOFF(f) bitize(offsetof(xfs_attr_sf_entry_t, f))
+const field_t  attr_sf_entry_flds[] = {
+       { "namelen", FLDT_UINT8D, OI(EOFF(namelen)), C1, 0, TYP_NONE },
+       { "valuelen", FLDT_UINT8D, OI(EOFF(valuelen)), C1, 0, TYP_NONE },
+       { "flags", FLDT_UINT8X, OI(EOFF(flags)), C1, FLD_SKIPALL, TYP_NONE },
+       { "root", FLDT_UINT1,
+         OI(EOFF(flags) + bitsz(__uint8_t) - XFS_ATTR_ROOT_BIT - 1), C1, 0,
+         TYP_NONE },
+       { "name", FLDT_CHARNS, OI(EOFF(nameval)), attr_sf_entry_name_count,
+         FLD_COUNT, TYP_NONE },
+       { "value", FLDT_CHARNS, attr_sf_entry_value_offset,
+         attr_sf_entry_value_count, FLD_COUNT|FLD_OFFSET, TYP_NONE },
+       { NULL }
+};
+
+static int
+attr_sf_entry_name_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_attr_sf_entry_t     *e;
+       
+       ASSERT(bitoffs(startoff) == 0);
+       e = (xfs_attr_sf_entry_t *)((char *)obj + byteize(startoff));
+       return e->namelen;
+}
+
+int
+attr_sf_entry_size(
+       void                    *obj,
+       int                     startoff,
+       int                     idx)
+{
+       xfs_attr_sf_entry_t     *e;
+       int                     i;
+       xfs_attr_shortform_t    *sf;
+
+       ASSERT(bitoffs(startoff) == 0);
+       sf = (xfs_attr_shortform_t *)((char *)obj + byteize(startoff));
+       e = &sf->list[0];
+       for (i = 0; i < idx; i++)
+               e = XFS_ATTR_SF_NEXTENTRY(e);
+       return bitize((int)XFS_ATTR_SF_ENTSIZE(e));
+}
+
+static int
+attr_sf_entry_value_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_attr_sf_entry_t     *e;
+       
+       ASSERT(bitoffs(startoff) == 0);
+       e = (xfs_attr_sf_entry_t *)((char *)obj + byteize(startoff));
+       return e->valuelen;
+}
+
+/*ARGSUSED*/
+static int
+attr_sf_entry_value_offset(
+       void                    *obj,
+       int                     startoff,
+       int                     idx)
+{
+       xfs_attr_sf_entry_t     *e;
+       
+       ASSERT(bitoffs(startoff) == 0);
+       ASSERT(idx == 0);
+       e = (xfs_attr_sf_entry_t *)((char *)obj + byteize(startoff));
+       return bitize((int)((char *)&e->nameval[e->namelen] - (char *)e));
+}
+
+static int
+attr_shortform_list_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_attr_shortform_t    *sf;
+
+       ASSERT(bitoffs(startoff) == 0);
+       sf = (xfs_attr_shortform_t *)((char *)obj + byteize(startoff));
+       return sf->hdr.count;
+}
+
+static int
+attr_shortform_list_offset(
+       void                    *obj,
+       int                     startoff,
+       int                     idx)
+{
+       xfs_attr_sf_entry_t     *e;
+       int                     i;
+       xfs_attr_shortform_t    *sf;
+
+       ASSERT(bitoffs(startoff) == 0);
+       sf = (xfs_attr_shortform_t *)((char *)obj + byteize(startoff));
+       e = &sf->list[0];
+       for (i = 0; i < idx; i++)
+               e = XFS_ATTR_SF_NEXTENTRY(e);
+       return bitize((int)((char *)e - (char *)sf));
+}
+
+/*ARGSUSED*/
+int
+attrshort_size(
+       void                    *obj,
+       int                     startoff,
+       int                     idx)
+{
+       xfs_attr_sf_entry_t     *e;
+       int                     i;
+       xfs_attr_shortform_t    *sf;
+
+       ASSERT(bitoffs(startoff) == 0);
+       ASSERT(idx == 0);
+       sf = (xfs_attr_shortform_t *)((char *)obj + byteize(startoff));
+       e = &sf->list[0];
+       for (i = 0; i < sf->hdr.count; i++)
+               e = XFS_ATTR_SF_NEXTENTRY(e);
+       return bitize((int)((char *)e - (char *)sf));
+}
diff --git a/db/attrshort.h b/db/attrshort.h
new file mode 100644 (file)
index 0000000..95c25b6
--- /dev/null
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+extern const field_t   attr_sf_entry_flds[];
+extern const field_t   attr_sf_hdr_flds[];
+extern const field_t   attr_shortform_flds[];
+extern const field_t   attrshort_hfld[];
+
+extern int     attr_sf_entry_size(void *obj, int startoff, int idx);
+extern int     attrshort_size(void *obj, int startoff, int idx);
diff --git a/db/bit.c b/db/bit.c
new file mode 100644 (file)
index 0000000..de33378
--- /dev/null
+++ b/db/bit.c
@@ -0,0 +1,202 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "bit.h"
+
+#undef setbit  /* defined in param.h on Linux */
+
+static int     getbit(char *ptr, int bit);
+static void    setbit(char *ptr, int bit, int val);
+
+static int
+getbit(
+       char    *ptr,
+       int     bit)
+{
+       int     mask;
+       int     shift;
+
+       ptr += byteize(bit);
+       bit = bitoffs(bit);
+       shift = 7 - bit;
+       mask = 1 << shift;
+       return (*ptr & mask) >> shift;
+}
+
+static void
+setbit(
+       char *ptr,
+       int  bit,
+       int  val)
+{
+       int     mask;
+       int     shift;
+
+       ptr += byteize(bit);
+       bit = bitoffs(bit);
+       shift = 7 - bit;
+       mask = (1 << shift);
+       if (val) {
+               *ptr |= mask;
+       } else {
+               mask = ~mask;
+               *ptr &= mask;
+       }
+}
+
+__int64_t
+getbitval(
+       void            *obj,
+       int             bitoff,
+       int             nbits,
+       int             flags)
+{
+       int             bit;
+       int             i;
+       char            *p;
+       __int64_t       rval;
+       int             signext;
+       int             z1, z2, z3, z4;
+        
+        ASSERT(nbits<=64);
+
+       p = (char *)obj + byteize(bitoff);
+       bit = bitoffs(bitoff);
+       signext = (flags & BVSIGNED) != 0;
+       z4 = ((__psint_t)p & 0xf) == 0 && bit == 0;
+       if (nbits == 64 && z4) {
+               if (signext)
+                       return (__int64_t)INT_GET(*(__int64_t *)p, ARCH_CONVERT);
+               else
+                       return (__int64_t)INT_GET(*(__uint64_t *)p, ARCH_CONVERT);
+       }
+       z3 = ((__psint_t)p & 0x7) == 0 && bit == 0;
+       if (nbits == 32 && z3) {
+               if (signext)
+                       return (__int64_t)INT_GET(*(__int32_t *)p, ARCH_CONVERT);
+               else
+                       return (__int64_t)INT_GET(*(__uint32_t *)p, ARCH_CONVERT);
+       }
+       z2 = ((__psint_t)p & 0x3) == 0 && bit == 0;
+       if (nbits == 16 && z2) {
+               if (signext)
+                       return (__int64_t)INT_GET(*(__int16_t *)p, ARCH_CONVERT);
+               else
+                       return (__int64_t)INT_GET(*(__uint16_t *)p, ARCH_CONVERT);
+       }
+       z1 = ((__psint_t)p & 0x1) == 0 && bit == 0;
+       if (nbits == 8 && z1) {
+               if (signext)
+                       return (__int64_t)INT_GET(*(__int8_t *)p, ARCH_CONVERT);
+               else
+                       return (__int64_t)INT_GET(*(__uint8_t *)p, ARCH_CONVERT);
+       }
+        
+        
+       for (i = 0, rval = 0LL; i < nbits; i++) {
+               if (getbit(p, bit + i)) {
+                       /* If the last bit is on and we care about sign 
+                         * bits and we don't have a full 64 bit 
+                         * container, turn all bits on between the 
+                         * sign bit and the most sig bit. 
+                         */
+                    
+                        /* handle endian swap here */
+#if __BYTE_ORDER == LITTLE_ENDIAN
+                       if (i == 0 && signext && nbits < 64)
+                               rval = -1LL << nbits;
+                       rval |= 1LL << (nbits - i - 1);
+#else
+                       if ((i == (nbits - 1)) && signext && nbits < 64)
+                               rval |= (-1LL << nbits); 
+                       rval |= 1LL << i;
+#endif
+               }
+       }
+       return rval;
+}
+
+void
+setbitval(
+       void *obuf,      /* buffer to write into */
+       int bitoff,      /* bit offset of where to write */
+       int nbits,       /* number of bits to write */
+       void *ibuf)      /* source bits */
+{
+       char    *in           = (char *)ibuf;
+       char    *out          = (char *)obuf;
+        
+       int     bit;
+        
+#if BYTE_ORDER == LITTLE_ENDIAN
+        int     big           = 0;
+#else
+        int     big           = 1;
+#endif
+   
+        /* only need to swap LE integers */ 
+        if (big || (nbits!=2 && nbits!=4 && nbits!=8) ) {
+                /* We don't have type info, so we can only assume
+                 * that 2,4 & 8 byte values are integers. sigh.
+                 */
+            
+                /* byte aligned ? */
+                if (bitoff%NBBY) {
+                        /* no - bit copy */
+                        for (bit=0; bit<nbits; bit++)
+                                setbit(out, bit+bitoff, getbit(in, bit));
+                } else {
+                        /* yes - byte copy */
+                        memcpy(out+byteize(bitoff), in, byteize(nbits));
+                }
+                
+        } else {
+               int     ibit;
+               int     obit;
+            
+                /* we need to endian swap this value */
+        
+                out+=byteize(bitoff); 
+                obit=bitoffs(bitoff);
+
+                ibit=nbits-NBBY;
+            
+                for (bit=0; bit<nbits; bit++) {
+                        setbit(out, bit+obit, getbit(in, ibit));
+                        if (ibit%NBBY==NBBY-1) 
+                                ibit-=NBBY*2-1;
+                        else
+                                ibit++;
+                }
+        }
+}
diff --git a/db/bit.h b/db/bit.h
new file mode 100644 (file)
index 0000000..861755a
--- /dev/null
+++ b/db/bit.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#define        bitize(s)       ((s) * NBBY)
+#define        bitsz(t)        bitize(sizeof(t))
+#define        bitszof(x,y)    bitize(szof(x,y))
+#define        byteize(s)      ((s) / NBBY)
+#define        bitoffs(s)      ((s) % NBBY)
+
+#define        BVUNSIGNED      0
+#define        BVSIGNED        1
+
+extern __int64_t       getbitval(void *obj, int bitoff, int nbits, int flags);
+extern void             setbitval(void *obuf, int bitoff, int nbits, void *ibuf);
diff --git a/db/block.c b/db/block.c
new file mode 100644 (file)
index 0000000..3196226
--- /dev/null
@@ -0,0 +1,303 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "block.h"
+#include "bmap.h"
+#include "command.h"
+#include "data.h"
+#include "type.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "inode.h"
+#include "io.h"
+#include "output.h"
+#include "mount.h"
+
+static int     ablock_f(int argc, char **argv);
+static void     ablock_help(void);
+static int     daddr_f(int argc, char **argv);
+static void     daddr_help(void);
+static int     dblock_f(int argc, char **argv);
+static void     dblock_help(void);
+static int     fsblock_f(int argc, char **argv);
+static void     fsblock_help(void);
+static void    print_rawdata(void *data, int len);
+
+static const cmdinfo_t ablock_cmd =
+       { "ablock", NULL, ablock_f, 1, 1, 1, "filoff",
+         "set address to file offset (attr fork)", ablock_help };
+static const cmdinfo_t daddr_cmd =
+       { "daddr", NULL, daddr_f, 0, 1, 1, "[d]", 
+         "set address to daddr value", daddr_help };
+static const cmdinfo_t dblock_cmd =
+       { "dblock", NULL, dblock_f, 1, 1, 1, "filoff",
+         "set address to file offset (data fork)", dblock_help };
+static const cmdinfo_t fsblock_cmd =
+       { "fsblock", "fsb", fsblock_f, 0, 1, 1, "[fsb]",
+         "set address to fsblock value", fsblock_help };
+
+static void 
+ablock_help(void)
+{
+       dbprintf(
+"\n Example:\n"
+"\n"
+" 'ablock 23' - sets the file position to the 23rd filesystem block in\n"
+" the inode's attribute fork.  The filesystem block size is specified in\n"
+" the superblock.\n\n"
+);
+}
+
+/*ARGSUSED*/
+static int
+ablock_f(
+       int             argc,
+       char            **argv)
+{
+       bmap_ext_t      bm;
+       xfs_dfiloff_t   bno;
+       xfs_dfsbno_t    dfsbno;
+       int             haveattr;
+       int             nex;
+       char            *p;
+
+       bno = (xfs_dfiloff_t)strtoull(argv[1], &p, 0);
+       if (*p != '\0') {
+               dbprintf("bad block number %s\n", argv[1]);
+               return 0;
+       }
+       push_cur();
+       set_cur_inode(iocur_top->ino);
+       haveattr = XFS_DFORK_Q((xfs_dinode_t *)iocur_top->data);
+       pop_cur();
+       if (!haveattr) {
+               dbprintf("no attribute data for file\n");
+               return 0;
+       }
+       nex = 1;
+       bmap(bno, 1, XFS_ATTR_FORK, &nex, &bm);
+       if (nex == 0) {
+               dbprintf("file attr block is unmapped\n");
+               return 0;
+       }
+       dfsbno = bm.startblock + (bno - bm.startoff);
+       ASSERT(typtab[TYP_ATTR].typnm == TYP_ATTR);
+       set_cur(&typtab[TYP_ATTR], (__int64_t)XFS_FSB_TO_DADDR(mp, dfsbno),
+               blkbb, DB_RING_ADD, NULL);
+       return 0;
+}
+
+void
+block_init(void)
+{
+       add_command(&ablock_cmd);
+       add_command(&daddr_cmd);
+       add_command(&dblock_cmd);
+       add_command(&fsblock_cmd);
+}
+
+static void 
+daddr_help(void)
+{
+       dbprintf(
+"\n Example:\n"
+"\n"
+" 'daddr 102' - sets position to the 102nd absolute disk block\n"
+" (512 byte block).\n"
+);
+}
+
+static int
+daddr_f(
+       int             argc,
+       char            **argv)
+{
+       __int64_t       d;
+       char            *p;
+
+       if (argc == 1) {
+               dbprintf("current daddr is %lld\n", iocur_top->off >> BBSHIFT);
+               return 0;
+       }
+       d = (__int64_t)strtoull(argv[1], &p, 0);
+       if (*p != '\0' ||
+           d >= mp->m_sb.sb_dblocks << (mp->m_sb.sb_blocklog - BBSHIFT)) {
+               dbprintf("bad daddr %s\n", argv[1]);
+               return 0;
+       }
+       ASSERT(typtab[TYP_DATA].typnm == TYP_DATA);
+       set_cur(&typtab[TYP_DATA], d, 1, DB_RING_ADD, NULL);
+       return 0;
+}
+
+static void 
+dblock_help(void)
+{
+       dbprintf(
+"\n Example:\n"
+"\n"
+" 'dblock 23' - sets the file position to the 23rd filesystem block in\n"
+" the inode's data fork.  The filesystem block size is specified in the\n"
+" superblock.\n\n"
+);
+}
+
+static int
+dblock_f(
+       int             argc,
+       char            **argv)
+{
+       bbmap_t         bbmap;
+       bmap_ext_t      *bmp;
+       xfs_dfiloff_t   bno;
+       xfs_dfsbno_t    dfsbno;
+       int             nb;
+       int             nex;
+       char            *p;
+       typnm_t         type;
+
+       bno = (xfs_dfiloff_t)strtoull(argv[1], &p, 0);
+       if (*p != '\0') {
+               dbprintf("bad block number %s\n", argv[1]);
+               return 0;
+       }
+       push_cur();
+       set_cur_inode(iocur_top->ino);
+       type = inode_next_type();
+       pop_cur();
+       if (type == TYP_NONE) {
+               dbprintf("no type for file data\n");
+               return 0;
+       }
+       nex = nb = type == TYP_DIR2 ? mp->m_dirblkfsbs : 1;
+       bmp = malloc(nb * sizeof(*bmp));
+       bmap(bno, nb, XFS_DATA_FORK, &nex, bmp);
+       if (nex == 0) {
+               dbprintf("file data block is unmapped\n");
+               free(bmp);
+               return 0;
+       }
+       dfsbno = bmp->startblock + (bno - bmp->startoff);
+       ASSERT(typtab[type].typnm == type);
+       if (nex > 1)
+               make_bbmap(&bbmap, nex, bmp);
+       set_cur(&typtab[type], (__int64_t)XFS_FSB_TO_DADDR(mp, dfsbno),
+               nb * blkbb, DB_RING_ADD, nex > 1 ? &bbmap : NULL);
+       free(bmp);
+       return 0;
+}
+
+static void 
+fsblock_help(void)
+{
+       dbprintf(
+"\n Example:\n"
+"\n"
+" 'fsblock 1023' - sets the file position to the 1023rd filesystem block.\n"
+" The filesystem block size is specified in the superblock and set during\n"
+" mkfs time.  Offset is absolute (not AG relative).\n\n"
+);
+}
+
+static int
+fsblock_f(
+       int             argc,
+       char            **argv)
+{
+       xfs_agblock_t   agbno;
+       xfs_agnumber_t  agno;
+       xfs_dfsbno_t    d;
+       char            *p;
+
+       if (argc == 1) {
+               dbprintf("current fsblock is %lld\n",
+                       XFS_DADDR_TO_FSB(mp, iocur_top->off >> BBSHIFT));
+               return 0;
+       }
+       d = strtoull(argv[1], &p, 0);
+       if (*p != '\0') {
+               dbprintf("bad fsblock %s\n", argv[1]);
+               return 0;
+       }
+       agno = XFS_FSB_TO_AGNO(mp, d);
+       agbno = XFS_FSB_TO_AGBNO(mp, d);
+       if (agno >= mp->m_sb.sb_agcount || agbno >= mp->m_sb.sb_agblocks) {
+               dbprintf("bad fsblock %s\n", argv[1]);
+               return 0;
+       }
+       ASSERT(typtab[TYP_DATA].typnm == TYP_DATA);
+       set_cur(&typtab[TYP_DATA], XFS_AGB_TO_DADDR(mp, agno, agbno),
+               blkbb, DB_RING_ADD, NULL);
+       return 0;
+}
+
+void
+print_block(
+       const field_t   *fields,
+       int             argc,
+       char            **argv)
+{
+       print_rawdata(iocur_top->data, iocur_top->len);
+}
+
+static void
+print_rawdata(
+       void    *data,
+       int     len)
+{
+       int     i;
+       int     j;
+       int     lastaddr;
+       int     offchars;
+       unsigned char   *p;
+
+       lastaddr = (len - 1) & ~(32 - 1);
+       if (lastaddr < 0x10)
+               offchars = 1;
+       else if (lastaddr < 0x100)
+               offchars = 2;
+       else if (lastaddr < 0x1000)
+               offchars = 3;
+       else
+               offchars = 4;
+       for (i = 0, p = data; i < len; i += 32) {
+               dbprintf("%-0*.*x:", offchars, offchars, i);
+               for (j = 0; j < 32 && i + j < len; j++, p++) {
+                       if ((j & 3) == 0)
+                               dbprintf(" ");
+                       dbprintf("%02x", *p);
+               }
+               dbprintf("\n");
+       }
+}
diff --git a/db/block.h b/db/block.h
new file mode 100644 (file)
index 0000000..cf17f38
--- /dev/null
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+struct field;
+
+extern void    block_init(void);
+extern void    print_block(const struct field *fields, int argc, char **argv);
diff --git a/db/bmap.c b/db/bmap.c
new file mode 100644 (file)
index 0000000..69e2d3d
--- /dev/null
+++ b/db/bmap.c
@@ -0,0 +1,355 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include <getopt.h>
+#include "command.h"
+#include "data.h"
+#include "type.h"
+#include "bmap.h"
+#include "io.h"
+#include "inode.h"
+#include "output.h"
+#include "mount.h"
+
+static int             bmap_f(int argc, char **argv);
+static int             bmap_one_extent(xfs_bmbt_rec_64_t *ep,
+                                       xfs_dfiloff_t *offp, xfs_dfiloff_t eoff,
+                                       int *idxp, bmap_ext_t *bep);
+static xfs_fsblock_t   select_child(xfs_dfiloff_t off, xfs_bmbt_key_t *kp,
+                                    xfs_bmbt_ptr_t *pp, int nrecs);
+
+static const cmdinfo_t bmap_cmd =
+       { "bmap", NULL, bmap_f, 0, 3, 0, "[-ad] [block [len]]",
+         "show block map for current file", NULL };
+
+void
+bmap(
+       xfs_dfiloff_t           offset,
+       xfs_dfilblks_t          len,
+       int                     whichfork,
+       int                     *nexp,
+       bmap_ext_t              *bep)
+{
+       xfs_bmbt_block_t        *block;
+       xfs_fsblock_t           bno;
+       xfs_dfiloff_t           curoffset;
+       xfs_dinode_t            *dip;
+       xfs_dfiloff_t           eoffset;
+       xfs_bmbt_rec_64_t       *ep;
+       xfs_dinode_fmt_t        fmt;
+       int                     fsize;
+       xfs_bmbt_key_t          *kp;
+       int                     n;
+       int                     nex;
+       xfs_fsblock_t           nextbno;
+       int                     nextents;
+       xfs_bmbt_ptr_t          *pp;
+       xfs_bmdr_block_t        *rblock;
+       typnm_t                 typ;
+       xfs_bmbt_rec_64_t       *xp;
+
+       push_cur();
+       set_cur_inode(iocur_top->ino);
+       nex = *nexp;
+       *nexp = 0;
+       ASSERT(nex > 0);
+       dip = iocur_top->data;
+       n = 0;
+       eoffset = offset + len - 1;
+       curoffset = offset;
+       fmt = (xfs_dinode_fmt_t)XFS_DFORK_FORMAT_ARCH(dip, whichfork, ARCH_CONVERT);
+       typ = whichfork == XFS_DATA_FORK ? TYP_BMAPBTD : TYP_BMAPBTA;
+       ASSERT(typtab[typ].typnm == typ);
+       ASSERT(fmt == XFS_DINODE_FMT_EXTENTS || fmt == XFS_DINODE_FMT_BTREE);
+       if (fmt == XFS_DINODE_FMT_EXTENTS) {
+               nextents = XFS_DFORK_NEXTENTS_ARCH(dip, whichfork, ARCH_CONVERT);
+               xp = (xfs_bmbt_rec_64_t *)XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_CONVERT);
+               for (ep = xp; ep < &xp[nextents] && n < nex; ep++) {
+                       if (!bmap_one_extent(ep, &curoffset, eoffset, &n, bep))
+                               break;
+               }
+       } else {
+               push_cur();
+               bno = NULLFSBLOCK;
+               rblock = (xfs_bmdr_block_t *)XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_CONVERT);
+               fsize = XFS_DFORK_SIZE_ARCH(dip, mp, whichfork, ARCH_CONVERT);
+               pp = XFS_BTREE_PTR_ADDR(fsize, xfs_bmdr, rblock, 1,
+                       XFS_BTREE_BLOCK_MAXRECS(fsize, xfs_bmdr, 0));
+               kp = XFS_BTREE_KEY_ADDR(fsize, xfs_bmdr, rblock, 1,
+                       XFS_BTREE_BLOCK_MAXRECS(fsize, xfs_bmdr, 0));
+               bno = select_child(curoffset, kp, pp, INT_GET(rblock->bb_numrecs, ARCH_CONVERT));
+               for (;;) {
+                       set_cur(&typtab[typ], XFS_FSB_TO_DADDR(mp, bno),
+                               blkbb, DB_RING_IGN, NULL);
+                       block = (xfs_bmbt_block_t *)iocur_top->data;
+                       if (INT_GET(block->bb_level, ARCH_CONVERT) == 0)
+                               break;
+                       pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt,
+                               block, 1,
+                               XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize,
+                                       xfs_bmbt, 0));
+                       kp = XFS_BTREE_KEY_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt,
+                               block, 1,
+                               XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize,
+                                       xfs_bmbt, 0));
+                       bno = select_child(curoffset, kp, pp,
+                               INT_GET(block->bb_numrecs, ARCH_CONVERT));
+               }
+               for (;;) {
+                       nextbno = INT_GET(block->bb_rightsib, ARCH_CONVERT);
+                       nextents = INT_GET(block->bb_numrecs, ARCH_CONVERT);
+                       xp = (xfs_bmbt_rec_64_t *)XFS_BTREE_REC_ADDR(
+                               mp->m_sb.sb_blocksize, xfs_bmbt, block, 1,
+                               XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize,
+                                       xfs_bmbt, 1));
+                       for (ep = xp; ep < &xp[nextents] && n < nex; ep++) {
+                               if (!bmap_one_extent(ep, &curoffset, eoffset,
+                                               &n, bep)) {
+                                       nextbno = NULLFSBLOCK;
+                                       break;
+                               }
+                       }
+                       bno = nextbno;
+                       if (bno == NULLFSBLOCK)
+                               break;
+                       set_cur(&typtab[typ], XFS_FSB_TO_DADDR(mp, bno),
+                               blkbb, DB_RING_IGN, NULL);
+                       block = (xfs_bmbt_block_t *)iocur_top->data;
+               }
+               pop_cur();
+       }
+       pop_cur();
+       *nexp = n;
+}
+
+static int
+bmap_f(
+       int             argc,
+       char            **argv)
+{
+       int             afork = 0;
+       bmap_ext_t      be;
+       int             c;
+       xfs_dfiloff_t   co;
+       int             dfork = 0;
+       xfs_dinode_t    *dip;
+       xfs_dfiloff_t   eo;
+       xfs_dfilblks_t  len;
+       int             nex;
+       char            *p;
+       int             whichfork;
+
+       if (iocur_top->ino == NULLFSINO) {
+               dbprintf("no current inode\n");
+               return 0;
+       }
+       optind = 0;
+       if (argc) while ((c = getopt(argc, argv, "ad")) != EOF) {
+               switch (c) {
+               case 'a':
+                       afork = 1;
+                       break;
+               case 'd':
+                       dfork = 1;
+                       break;
+               default:
+                       dbprintf("bad option for bmap command\n");
+                       return 0;
+               }
+       }
+       if (afork + dfork == 0) {
+               push_cur();
+               set_cur_inode(iocur_top->ino);
+               dip = iocur_top->data;
+               if (INT_GET(dip->di_core.di_nextents, ARCH_CONVERT))
+                       dfork = 1;
+               if (INT_GET(dip->di_core.di_anextents, ARCH_CONVERT))
+                       afork = 1;
+               pop_cur();
+       }
+       if (optind < argc) {
+               co = (xfs_dfiloff_t)strtoull(argv[optind], &p, 0);
+               if (*p != '\0') {
+                       dbprintf("bad block number for bmap %s\n",
+                               argv[optind]);
+                       return 0;
+               }
+               optind++;
+               if (optind < argc) {
+                       len = (xfs_dfilblks_t)strtoull(argv[optind], &p, 0);
+                       if (*p != '\0') {
+                               dbprintf("bad len for bmap %s\n", argv[optind]);
+                               return 0;
+                       }
+                       eo = co + len - 1;
+               } else
+                       eo = co;
+       } else {
+               co = 0;
+               eo = -1;
+       }
+       for (whichfork = XFS_DATA_FORK;
+            whichfork <= XFS_ATTR_FORK;
+            whichfork++) {
+               if (whichfork == XFS_DATA_FORK && !dfork)
+                       continue;
+               if (whichfork == XFS_ATTR_FORK && !afork)
+                       continue;
+               for (;;) {
+                       nex = 1;
+                       bmap(co, eo - co + 1, whichfork, &nex, &be);
+                       if (nex == 0)
+                               break;
+                       dbprintf("%s offset %lld startblock %llu (%u/%u) count "
+                                "%llu flag %u\n",
+                               whichfork == XFS_DATA_FORK ? "data" : "attr",
+                               be.startoff, be.startblock,
+                               XFS_FSB_TO_AGNO(mp, be.startblock),
+                               XFS_FSB_TO_AGBNO(mp, be.startblock),
+                               be.blockcount, be.flag);
+                       co = be.startoff + be.blockcount;
+               }
+       }
+       return 0;
+}
+
+void
+bmap_init(void)
+{
+       add_command(&bmap_cmd);
+}
+
+static int
+bmap_one_extent(
+       xfs_bmbt_rec_64_t       *ep,
+       xfs_dfiloff_t           *offp,
+       xfs_dfiloff_t           eoff,
+       int                     *idxp,
+       bmap_ext_t              *bep)
+{
+       xfs_dfilblks_t          c;
+       xfs_dfiloff_t           curoffset;
+       int                     f;
+       int                     idx;
+       xfs_dfiloff_t           o;
+       xfs_dfsbno_t            s;
+
+       convert_extent(ep, &o, &s, &c, &f);
+       curoffset = *offp;
+       idx = *idxp;
+       if (o + c <= curoffset)
+               return 1;
+       if (o > eoff)
+               return 0;
+       if (o < curoffset) {
+               c -= curoffset - o;
+               s += curoffset - o;
+               o = curoffset;
+       }
+       if (o + c - 1 > eoff)
+               c -= (o + c - 1) - eoff;
+       bep[idx].startoff = o;
+       bep[idx].startblock = s;
+       bep[idx].blockcount = c;
+       bep[idx].flag = f;
+       *idxp = idx + 1;
+       *offp = o + c;
+       return 1;
+}
+
+void
+convert_extent(
+       xfs_bmbt_rec_64_t               *rp,
+       xfs_dfiloff_t           *op,
+       xfs_dfsbno_t            *sp,
+       xfs_dfilblks_t          *cp,
+       int                     *fp)
+{
+       xfs_bmbt_irec_t irec, *s = &irec;
+
+       libxfs_bmbt_get_all((xfs_bmbt_rec_t *)rp, s);
+
+       if (s->br_state == XFS_EXT_UNWRITTEN) {
+               *fp = 1;
+       } else {
+               *fp = 0;
+       }
+
+       *op = s->br_startoff;
+       *sp = s->br_startblock;
+       *cp = s->br_blockcount;
+}
+
+void
+make_bbmap(
+       bbmap_t         *bbmap,
+       int             nex,
+       bmap_ext_t      *bmp)
+{
+       int             d;
+       xfs_dfsbno_t    dfsbno;
+       int             i;
+       int             j;
+       int             k;
+
+       for (i = 0, d = 0; i < nex; i++) {
+               dfsbno = bmp[i].startblock;
+               for (j = 0; j < bmp[i].blockcount; j++, dfsbno++) {
+                       for (k = 0; k < blkbb; k++)
+                               bbmap->b[d++] =
+                                       XFS_FSB_TO_DADDR(mp, dfsbno) + k;
+               }
+       }
+}
+
+static xfs_fsblock_t
+select_child(
+       xfs_dfiloff_t   off,
+       xfs_bmbt_key_t  *kp,
+       xfs_bmbt_ptr_t  *pp,
+       int             nrecs)
+{
+       int             i;
+
+       for (i = 0; i < nrecs; i++) {
+               if (INT_GET(kp[i].br_startoff, ARCH_CONVERT) == off)
+                       return INT_GET(pp[i], ARCH_CONVERT);
+               if (INT_GET(kp[i].br_startoff, ARCH_CONVERT) > off) {
+                       if (i == 0)
+                               return INT_GET(pp[i], ARCH_CONVERT);
+                       else
+                               return INT_GET(pp[i - 1], ARCH_CONVERT);
+               }
+       }
+       return INT_GET(pp[nrecs - 1], ARCH_CONVERT);
+}
diff --git a/db/bmap.h b/db/bmap.h
new file mode 100644 (file)
index 0000000..2420601
--- /dev/null
+++ b/db/bmap.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+struct bbmap;
+struct xfs_bmbt_rec_64;
+
+typedef struct bmap_ext {
+       xfs_dfiloff_t   startoff;
+       xfs_dfsbno_t    startblock;
+       xfs_dfilblks_t  blockcount;
+       int             flag;
+} bmap_ext_t;
+
+extern void    bmap(xfs_dfiloff_t offset, xfs_dfilblks_t len, int whichfork,
+                    int *nexp, bmap_ext_t *bep);
+extern void    bmap_init(void);
+extern void    convert_extent(struct xfs_bmbt_rec_64 *rp, xfs_dfiloff_t *op,
+                              xfs_dfsbno_t *sp, xfs_dfilblks_t *cp, int *fp);
+extern void    make_bbmap(struct bbmap *bbmap, int nex, bmap_ext_t *bmp);
diff --git a/db/bmapbt.c b/db/bmapbt.c
new file mode 100644 (file)
index 0000000..3ecfb37
--- /dev/null
@@ -0,0 +1,331 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "data.h"
+#include "type.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "bmapbt.h"
+#include "print.h"
+#include "bit.h"
+#include "mount.h"
+
+static int     bmapbta_key_count(void *obj, int startoff);
+static int     bmapbta_key_offset(void *obj, int startoff, int idx);
+static int     bmapbta_ptr_count(void *obj, int startoff);
+static int     bmapbta_ptr_offset(void *obj, int startoff, int idx);
+static int     bmapbta_rec_count(void *obj, int startoff);
+static int     bmapbta_rec_offset(void *obj, int startoff, int idx);
+static int     bmapbtd_key_count(void *obj, int startoff);
+static int     bmapbtd_key_offset(void *obj, int startoff, int idx);
+static int     bmapbtd_ptr_count(void *obj, int startoff);
+static int     bmapbtd_ptr_offset(void *obj, int startoff, int idx);
+static int     bmapbtd_rec_count(void *obj, int startoff);
+static int     bmapbtd_rec_offset(void *obj, int startoff, int idx);
+
+const field_t  bmapbta_hfld[] = {
+       { "", FLDT_BMAPBTA, OI(0), C1, 0, TYP_NONE },
+       { NULL }
+};
+const field_t  bmapbtd_hfld[] = {
+       { "", FLDT_BMAPBTD, OI(0), C1, 0, TYP_NONE },
+       { NULL }
+};
+
+#define        OFF(f)  bitize(offsetof(xfs_bmbt_block_t, bb_ ## f))
+const field_t  bmapbta_flds[] = {
+       { "magic", FLDT_UINT32X, OI(OFF(magic)), C1, 0, TYP_NONE },
+       { "level", FLDT_UINT16D, OI(OFF(level)), C1, 0, TYP_NONE },
+       { "numrecs", FLDT_UINT16D, OI(OFF(numrecs)), C1, 0, TYP_NONE },
+       { "leftsib", FLDT_DFSBNO, OI(OFF(leftsib)), C1, 0, TYP_BMAPBTA },
+       { "rightsib", FLDT_DFSBNO, OI(OFF(rightsib)), C1, 0, TYP_BMAPBTA },
+       { "recs", FLDT_BMAPBTAREC, bmapbta_rec_offset, bmapbta_rec_count,
+         FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_NONE },
+       { "keys", FLDT_BMAPBTAKEY, bmapbta_key_offset, bmapbta_key_count,
+         FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_NONE },
+       { "ptrs", FLDT_BMAPBTAPTR, bmapbta_ptr_offset, bmapbta_ptr_count,
+         FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_BMAPBTA },
+       { NULL }
+};
+const field_t  bmapbtd_flds[] = {
+       { "magic", FLDT_UINT32X, OI(OFF(magic)), C1, 0, TYP_NONE },
+       { "level", FLDT_UINT16D, OI(OFF(level)), C1, 0, TYP_NONE },
+       { "numrecs", FLDT_UINT16D, OI(OFF(numrecs)), C1, 0, TYP_NONE },
+       { "leftsib", FLDT_DFSBNO, OI(OFF(leftsib)), C1, 0, TYP_BMAPBTD },
+       { "rightsib", FLDT_DFSBNO, OI(OFF(rightsib)), C1, 0, TYP_BMAPBTD },
+       { "recs", FLDT_BMAPBTDREC, bmapbtd_rec_offset, bmapbtd_rec_count,
+         FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_NONE },
+       { "keys", FLDT_BMAPBTDKEY, bmapbtd_key_offset, bmapbtd_key_count,
+         FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_NONE },
+       { "ptrs", FLDT_BMAPBTDPTR, bmapbtd_ptr_offset, bmapbtd_ptr_count,
+         FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_BMAPBTD },
+       { NULL }
+};
+
+#define        KOFF(f) bitize(offsetof(xfs_bmbt_key_t, br_ ## f))
+const field_t  bmapbta_key_flds[] = {
+       { "startoff", FLDT_DFILOFFA, OI(KOFF(startoff)), C1, 0, TYP_ATTR },
+       { NULL }
+};
+const field_t  bmapbtd_key_flds[] = {
+       { "startoff", FLDT_DFILOFFD, OI(KOFF(startoff)), C1, 0, TYP_INODATA },
+       { NULL }
+};
+
+const field_t  bmapbta_rec_flds[] = {
+       { "startoff", FLDT_CFILEOFFA, OI(BMBT_STARTOFF_BITOFF), C1, 0,
+         TYP_ATTR },
+       { "startblock", FLDT_CFSBLOCK, OI(BMBT_STARTBLOCK_BITOFF), C1, 0,
+         TYP_ATTR },
+       { "blockcount", FLDT_CEXTLEN, OI(BMBT_BLOCKCOUNT_BITOFF), C1, 0,
+         TYP_NONE },
+       { "extentflag", FLDT_CEXTFLG, OI(BMBT_EXNTFLAG_BITOFF), C1, 0,
+         TYP_NONE },
+       { NULL }
+};
+const field_t  bmapbtd_rec_flds[] = {
+       { "startoff", FLDT_CFILEOFFD, OI(BMBT_STARTOFF_BITOFF), C1, 0,
+         TYP_INODATA },
+       { "startblock", FLDT_CFSBLOCK, OI(BMBT_STARTBLOCK_BITOFF), C1, 0,
+         TYP_INODATA },
+       { "blockcount", FLDT_CEXTLEN, OI(BMBT_BLOCKCOUNT_BITOFF), C1, 0,
+         TYP_NONE },
+       { "extentflag", FLDT_CEXTFLG, OI(BMBT_EXNTFLAG_BITOFF), C1, 0,
+         TYP_NONE },
+       { NULL }
+};
+
+static int
+bmapbta_key_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_bmbt_block_t        *block;
+
+       ASSERT(startoff == 0);
+       block = obj;
+       if (INT_GET(block->bb_level, ARCH_CONVERT) == 0)
+               return 0;
+       return INT_GET(block->bb_numrecs, ARCH_CONVERT);
+}
+
+static int
+bmapbta_key_offset(
+       void                    *obj,
+       int                     startoff,
+       int                     idx)
+{
+       xfs_bmbt_block_t        *block;
+       xfs_bmbt_key_t          *kp;
+
+       ASSERT(startoff == 0);
+       block = obj;
+       ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0);
+       kp = XFS_BTREE_KEY_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, block, idx,
+               XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_bmbt, 0));
+       return bitize((int)((char *)kp - (char *)block));
+}
+
+static int
+bmapbta_ptr_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_bmbt_block_t        *block;
+
+       ASSERT(startoff == 0);
+       block = obj;
+       if (INT_GET(block->bb_level, ARCH_CONVERT) == 0)
+               return 0;
+       return INT_GET(block->bb_numrecs, ARCH_CONVERT);
+}
+
+static int
+bmapbta_ptr_offset(
+       void                    *obj,
+       int                     startoff,
+       int                     idx)
+{
+       xfs_bmbt_block_t        *block;
+       xfs_bmbt_ptr_t          *pp;
+
+       ASSERT(startoff == 0);
+       block = obj;
+       ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0);
+       pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, block, idx,
+               XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_bmbt, 0));
+       return bitize((int)((char *)pp - (char *)block));
+}
+
+static int
+bmapbta_rec_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_bmbt_block_t        *block;
+
+       ASSERT(startoff == 0);
+       block = obj;
+       if (INT_GET(block->bb_level, ARCH_CONVERT) > 0)
+               return 0;
+       return INT_GET(block->bb_numrecs, ARCH_CONVERT);
+}
+
+static int
+bmapbta_rec_offset(
+       void                    *obj,
+       int                     startoff,
+       int                     idx)
+{
+       xfs_bmbt_block_t        *block;
+       xfs_bmbt_rec_t          *rp;
+
+       ASSERT(startoff == 0);
+       block = obj;
+       ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) == 0);
+       rp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, block, idx,
+               XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_bmbt, 1));
+       return bitize((int)((char *)rp - (char *)block));
+}
+
+int
+bmapbta_size(
+       void    *obj,
+       int     startoff,
+       int     idx)
+{
+       return bitize(mp->m_sb.sb_blocksize);
+}
+
+static int
+bmapbtd_key_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_bmbt_block_t        *block;
+
+       ASSERT(startoff == 0);
+       block = obj;
+       if (INT_GET(block->bb_level, ARCH_CONVERT) == 0)
+               return 0;
+       return INT_GET(block->bb_numrecs, ARCH_CONVERT);
+}
+
+static int
+bmapbtd_key_offset(
+       void                    *obj,
+       int                     startoff,
+       int                     idx)
+{
+       xfs_bmbt_block_t        *block;
+       xfs_bmbt_key_t          *kp;
+
+       ASSERT(startoff == 0);
+       block = obj;
+       ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0);
+       kp = XFS_BTREE_KEY_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, block, idx,
+               XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_bmbt, 0));
+       return bitize((int)((char *)kp - (char *)block));
+}
+
+static int
+bmapbtd_ptr_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_bmbt_block_t        *block;
+
+       ASSERT(startoff == 0);
+       block = obj;
+       if (INT_GET(block->bb_level, ARCH_CONVERT) == 0)
+               return 0;
+       return INT_GET(block->bb_numrecs, ARCH_CONVERT);
+}
+
+static int
+bmapbtd_ptr_offset(
+       void                    *obj,
+       int                     startoff,
+       int                     idx)
+{
+       xfs_bmbt_block_t        *block;
+       xfs_bmbt_ptr_t          *pp;
+
+       ASSERT(startoff == 0);
+       block = obj;
+       ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0);
+       pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, block, idx,
+               XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_bmbt, 0));
+       return bitize((int)((char *)pp - (char *)block));
+}
+
+static int
+bmapbtd_rec_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_bmbt_block_t        *block;
+
+       ASSERT(startoff == 0);
+       block = obj;
+       if (INT_GET(block->bb_level, ARCH_CONVERT) > 0)
+               return 0;
+       return INT_GET(block->bb_numrecs, ARCH_CONVERT);
+}
+
+static int
+bmapbtd_rec_offset(
+       void                    *obj,
+       int                     startoff,
+       int                     idx)
+{
+       xfs_bmbt_block_t        *block;
+       xfs_bmbt_rec_t          *rp;
+
+       ASSERT(startoff == 0);
+       block = obj;
+       ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) == 0);
+       rp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, block, idx,
+               XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_bmbt, 1));
+       return bitize((int)((char *)rp - (char *)block));
+}
+
+int
+bmapbtd_size(
+       void    *obj,
+       int     startoff,
+       int     idx)
+{
+       return bitize(mp->m_sb.sb_blocksize);
+}
diff --git a/db/bmapbt.h b/db/bmapbt.h
new file mode 100644 (file)
index 0000000..8f39c98
--- /dev/null
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+struct field;
+
+extern const struct field      bmapbta_flds[];
+extern const struct field      bmapbta_hfld[];
+extern const struct field      bmapbta_key_flds[];
+extern const struct field      bmapbta_rec_flds[];
+extern const struct field      bmapbtd_flds[];
+extern const struct field      bmapbtd_hfld[];
+extern const struct field      bmapbtd_key_flds[];
+extern const struct field      bmapbtd_rec_flds[];
+
+extern int     bmapbta_size(void *obj, int startoff, int idx);
+extern int     bmapbtd_size(void *obj, int startoff, int idx);
diff --git a/db/bmroot.c b/db/bmroot.c
new file mode 100644 (file)
index 0000000..a96c6d2
--- /dev/null
@@ -0,0 +1,274 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "data.h"
+#include "type.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "bmroot.h"
+#include "io.h"
+#include "print.h"
+#include "bit.h"
+#include "mount.h"
+
+static int     bmroota_key_count(void *obj, int startoff);
+static int     bmroota_key_offset(void *obj, int startoff, int idx);
+static int     bmroota_ptr_count(void *obj, int startoff);
+static int     bmroota_ptr_offset(void *obj, int startoff, int idx);
+static int     bmrootd_key_count(void *obj, int startoff);
+static int     bmrootd_key_offset(void *obj, int startoff, int idx);
+static int     bmrootd_ptr_count(void *obj, int startoff);
+static int     bmrootd_ptr_offset(void *obj, int startoff, int idx);
+
+#define        OFF(f)  bitize(offsetof(xfs_bmdr_block_t, bb_ ## f))
+const field_t  bmroota_flds[] = {
+       { "level", FLDT_UINT16D, OI(OFF(level)), C1, 0, TYP_NONE },
+       { "numrecs", FLDT_UINT16D, OI(OFF(numrecs)), C1, 0, TYP_NONE },
+       { "keys", FLDT_BMROOTAKEY, bmroota_key_offset, bmroota_key_count,
+         FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_NONE },
+       { "ptrs", FLDT_BMROOTAPTR, bmroota_ptr_offset, bmroota_ptr_count,
+         FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_BMAPBTA },
+       { NULL }
+};
+const field_t  bmrootd_flds[] = {
+       { "level", FLDT_UINT16D, OI(OFF(level)), C1, 0, TYP_NONE },
+       { "numrecs", FLDT_UINT16D, OI(OFF(numrecs)), C1, 0, TYP_NONE },
+       { "keys", FLDT_BMROOTDKEY, bmrootd_key_offset, bmrootd_key_count,
+         FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_NONE },
+       { "ptrs", FLDT_BMROOTDPTR, bmrootd_ptr_offset, bmrootd_ptr_count,
+         FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_BMAPBTD },
+       { NULL }
+};
+
+#define        KOFF(f) bitize(offsetof(xfs_bmdr_key_t, br_ ## f))
+const field_t  bmroota_key_flds[] = {
+       { "startoff", FLDT_DFILOFFA, OI(KOFF(startoff)), C1, 0, TYP_NONE },
+       { NULL }
+};
+const field_t  bmrootd_key_flds[] = {
+       { "startoff", FLDT_DFILOFFD, OI(KOFF(startoff)), C1, 0, TYP_NONE },
+       { NULL }
+};
+
+static int
+bmroota_key_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_bmdr_block_t        *block;
+#ifdef DEBUG
+       xfs_dinode_t            *dip = obj;
+#endif
+
+       ASSERT(bitoffs(startoff) == 0);
+       ASSERT(obj == iocur_top->data);
+       block = (xfs_bmdr_block_t *)((char *)obj + byteize(startoff));
+       ASSERT(XFS_DFORK_Q(dip) && (char *)block == XFS_DFORK_APTR(dip));
+       ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0);
+       return INT_GET(block->bb_numrecs, ARCH_CONVERT);
+}
+
+static int
+bmroota_key_offset(
+       void                    *obj,
+       int                     startoff,
+       int                     idx)
+{
+       xfs_bmdr_block_t        *block;
+       /* REFERENCED */
+       xfs_dinode_t            *dip;
+       xfs_bmdr_key_t          *kp;
+
+       ASSERT(bitoffs(startoff) == 0);
+       ASSERT(obj == iocur_top->data);
+       dip = obj;
+       block = (xfs_bmdr_block_t *)((char *)obj + byteize(startoff));
+       ASSERT(XFS_DFORK_Q(dip) && (char *)block == XFS_DFORK_APTR(dip));
+       ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0);
+       kp = XFS_BTREE_KEY_ADDR(iocur_top->len, xfs_bmdr, block, idx,
+               XFS_BTREE_BLOCK_MAXRECS(XFS_DFORK_ASIZE(dip, mp), xfs_bmdr, 0));
+       return bitize((int)((char *)kp - (char *)block));
+}
+
+static int
+bmroota_ptr_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_bmdr_block_t        *block;
+#ifdef DEBUG
+       xfs_dinode_t            *dip = obj;
+#endif
+
+       ASSERT(bitoffs(startoff) == 0);
+       ASSERT(obj == iocur_top->data);
+       block = (xfs_bmdr_block_t *)((char *)obj + byteize(startoff));
+       ASSERT(XFS_DFORK_Q(dip) && (char *)block == XFS_DFORK_APTR(dip));
+       ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0);
+       return INT_GET(block->bb_numrecs, ARCH_CONVERT);
+}
+
+static int
+bmroota_ptr_offset(
+       void                    *obj,
+       int                     startoff,
+       int                     idx)
+{
+       xfs_bmdr_block_t        *block;
+       xfs_dinode_t            *dip;
+       xfs_bmdr_ptr_t          *pp;
+
+       ASSERT(bitoffs(startoff) == 0);
+       ASSERT(obj == iocur_top->data);
+       dip = obj;
+       block = (xfs_bmdr_block_t *)((char *)obj + byteize(startoff));
+       ASSERT(XFS_DFORK_Q(dip) && (char *)block == XFS_DFORK_APTR(dip));
+       ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0);
+       pp = XFS_BTREE_PTR_ADDR(iocur_top->len, xfs_bmdr, block, idx,
+               XFS_BTREE_BLOCK_MAXRECS(XFS_DFORK_ASIZE(dip, mp), xfs_bmdr, 0));
+       return bitize((int)((char *)pp - (char *)block));
+}
+
+int
+bmroota_size(
+       void                    *obj,
+       int                     startoff,
+       int                     idx)
+{
+       xfs_dinode_t            *dip;
+#ifdef DEBUG
+       xfs_bmdr_block_t        *block;
+#endif
+
+       ASSERT(bitoffs(startoff) == 0);
+       ASSERT(obj == iocur_top->data);
+       ASSERT(idx == 0);
+       dip = obj;
+#ifdef DEBUG
+       block = (xfs_bmdr_block_t *)((char *)obj + byteize(startoff));
+       ASSERT(XFS_DFORK_Q(dip) && (char *)block == XFS_DFORK_APTR(dip));
+#endif
+       return bitize((int)XFS_DFORK_ASIZE(dip, mp));
+}
+
+static int
+bmrootd_key_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_bmdr_block_t        *block;
+#ifdef DEBUG
+       xfs_dinode_t            *dip = obj;
+#endif
+
+       ASSERT(bitoffs(startoff) == 0);
+       ASSERT(obj == iocur_top->data);
+       block = (xfs_bmdr_block_t *)((char *)obj + byteize(startoff));
+       ASSERT((char *)block == XFS_DFORK_DPTR(dip));
+       ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0);
+       return INT_GET(block->bb_numrecs, ARCH_CONVERT);
+}
+
+static int
+bmrootd_key_offset(
+       void                    *obj,
+       int                     startoff,
+       int                     idx)
+{
+       xfs_bmdr_block_t        *block;
+       xfs_bmdr_key_t          *kp;
+       xfs_dinode_t            *dip;
+
+       ASSERT(bitoffs(startoff) == 0);
+       ASSERT(obj == iocur_top->data);
+       dip = obj;
+       block = (xfs_bmdr_block_t *)((char *)obj + byteize(startoff));
+       ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0);
+       kp = XFS_BTREE_KEY_ADDR(iocur_top->len, xfs_bmdr, block, idx,
+               XFS_BTREE_BLOCK_MAXRECS(XFS_DFORK_DSIZE(dip, mp), xfs_bmdr, 0));
+       return bitize((int)((char *)kp - (char *)block));
+}
+
+static int
+bmrootd_ptr_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_bmdr_block_t        *block;
+#ifdef DEBUG
+       xfs_dinode_t            *dip = obj;
+#endif
+
+       ASSERT(bitoffs(startoff) == 0);
+       ASSERT(obj == iocur_top->data);
+       block = (xfs_bmdr_block_t *)((char *)obj + byteize(startoff));
+       ASSERT((char *)block == XFS_DFORK_DPTR(dip));
+       ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0);
+       return INT_GET(block->bb_numrecs, ARCH_CONVERT);
+}
+
+static int
+bmrootd_ptr_offset(
+       void                    *obj,
+       int                     startoff,
+       int                     idx)
+{
+       xfs_bmdr_block_t        *block;
+       xfs_bmdr_ptr_t          *pp;
+       xfs_dinode_t            *dip;
+
+       ASSERT(bitoffs(startoff) == 0);
+       ASSERT(obj == iocur_top->data);
+       dip = obj;
+       block = (xfs_bmdr_block_t *)((char *)obj + byteize(startoff));
+       ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0);
+       pp = XFS_BTREE_PTR_ADDR(iocur_top->len, xfs_bmdr, block, idx,
+               XFS_BTREE_BLOCK_MAXRECS(XFS_DFORK_DSIZE(dip, mp), xfs_bmdr, 0));
+       return bitize((int)((char *)pp - (char *)block));
+}
+
+int
+bmrootd_size(
+       void                    *obj,
+       int                     startoff,
+       int                     idx)
+{
+       xfs_dinode_t            *dip;
+
+       ASSERT(bitoffs(startoff) == 0);
+       ASSERT(obj == iocur_top->data);
+       ASSERT(idx == 0);
+       dip = obj;
+       return bitize((int)XFS_DFORK_DSIZE(dip, mp));
+}
diff --git a/db/bmroot.h b/db/bmroot.h
new file mode 100644 (file)
index 0000000..3f8ef0c
--- /dev/null
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+struct field;
+
+extern const struct field      bmroota_flds[];
+extern const struct field      bmroota_key_flds[];
+extern const struct field      bmrootd_flds[];
+extern const struct field      bmrootd_key_flds[];
+
+extern int     bmroota_size(void *obj, int startoff, int idx);
+extern int     bmrootd_size(void *obj, int startoff, int idx);
diff --git a/db/bnobt.c b/db/bnobt.c
new file mode 100644 (file)
index 0000000..3b09605
--- /dev/null
@@ -0,0 +1,187 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "data.h"
+#include "type.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "bnobt.h"
+#include "io.h"
+#include "print.h"
+#include "bit.h"
+#include "mount.h"
+
+static int     bnobt_key_count(void *obj, int startoff);
+static int     bnobt_key_offset(void *obj, int startoff, int idx);
+static int     bnobt_ptr_count(void *obj, int startoff);
+static int     bnobt_ptr_offset(void *obj, int startoff, int idx);
+static int     bnobt_rec_count(void *obj, int startoff);
+static int     bnobt_rec_offset(void *obj, int startoff, int idx);
+
+const field_t  bnobt_hfld[] = {
+       { "", FLDT_BNOBT, OI(0), C1, 0, TYP_NONE },
+       { NULL }
+};
+
+#define        OFF(f)  bitize(offsetof(xfs_alloc_block_t, bb_ ## f))
+const field_t  bnobt_flds[] = {
+       { "magic", FLDT_UINT32X, OI(OFF(magic)), C1, 0, TYP_NONE },
+       { "level", FLDT_UINT16D, OI(OFF(level)), C1, 0, TYP_NONE },
+       { "numrecs", FLDT_UINT16D, OI(OFF(numrecs)), C1, 0, TYP_NONE },
+       { "leftsib", FLDT_AGBLOCK, OI(OFF(leftsib)), C1, 0, TYP_BNOBT },
+       { "rightsib", FLDT_AGBLOCK, OI(OFF(rightsib)), C1, 0, TYP_BNOBT },
+       { "recs", FLDT_BNOBTREC, bnobt_rec_offset, bnobt_rec_count,
+         FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_NONE },
+       { "keys", FLDT_BNOBTKEY, bnobt_key_offset, bnobt_key_count,
+         FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_NONE },
+       { "ptrs", FLDT_BNOBTPTR, bnobt_ptr_offset, bnobt_ptr_count,
+         FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_BNOBT },
+       { NULL }
+};
+
+#define        KOFF(f) bitize(offsetof(xfs_alloc_key_t, ar_ ## f))
+const field_t  bnobt_key_flds[] = {
+       { "startblock", FLDT_AGBLOCK, OI(KOFF(startblock)), C1, 0, TYP_DATA },
+       { "blockcount", FLDT_EXTLEN, OI(KOFF(blockcount)), C1, 0, TYP_NONE },
+       { NULL }
+};
+
+#define        ROFF(f) bitize(offsetof(xfs_alloc_rec_t, ar_ ## f))
+const field_t  bnobt_rec_flds[] = {
+       { "startblock", FLDT_AGBLOCK, OI(ROFF(startblock)), C1, 0, TYP_DATA },
+       { "blockcount", FLDT_EXTLEN, OI(ROFF(blockcount)), C1, 0, TYP_NONE },
+       { NULL }
+};
+
+static int
+bnobt_key_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_alloc_block_t       *block;
+
+       ASSERT(startoff == 0);
+       block = obj;
+       if (INT_GET(block->bb_level, ARCH_CONVERT) == 0)
+               return 0;
+       return INT_GET(block->bb_numrecs, ARCH_CONVERT);
+}
+
+static int
+bnobt_key_offset(
+       void                    *obj,
+       int                     startoff,
+       int                     idx)
+{
+       xfs_alloc_block_t       *block;
+       xfs_alloc_key_t         *kp;
+
+       ASSERT(startoff == 0);
+       block = obj;
+       ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0);
+       kp = XFS_BTREE_KEY_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, block, idx,
+               XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_alloc, 0));
+       return bitize((int)((char *)kp - (char *)block));
+}
+
+static int
+bnobt_ptr_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_alloc_block_t       *block;
+
+       ASSERT(startoff == 0);
+       block = obj;
+       if (INT_GET(block->bb_level, ARCH_CONVERT) == 0)
+               return 0;
+       return INT_GET(block->bb_numrecs, ARCH_CONVERT);
+}
+
+static int
+bnobt_ptr_offset(
+       void                    *obj,
+       int                     startoff,
+       int                     idx)
+{
+       xfs_alloc_block_t       *block;
+       xfs_alloc_ptr_t         *pp;
+
+       ASSERT(startoff == 0);
+       block = obj;
+       ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0);
+       pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, block, idx,
+               XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_alloc, 0));
+       return bitize((int)((char *)pp - (char *)block));
+}
+
+static int
+bnobt_rec_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_alloc_block_t       *block;
+
+       ASSERT(startoff == 0);
+       block = obj;
+       if (INT_GET(block->bb_level, ARCH_CONVERT) > 0)
+               return 0;
+       return INT_GET(block->bb_numrecs, ARCH_CONVERT);
+}
+
+static int
+bnobt_rec_offset(
+       void                    *obj,
+       int                     startoff,
+       int                     idx)
+{
+       xfs_alloc_block_t       *block;
+       xfs_alloc_rec_t         *rp;
+
+       ASSERT(startoff == 0);
+       block = obj;
+       ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) == 0);
+       rp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, block, idx,
+               XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_alloc, 1));
+       return bitize((int)((char *)rp - (char *)block));
+}
+
+int
+bnobt_size(
+       void    *obj,
+       int     startoff,
+       int     idx)
+{
+       return bitize(mp->m_sb.sb_blocksize);
+}
diff --git a/db/bnobt.h b/db/bnobt.h
new file mode 100644 (file)
index 0000000..07e8b26
--- /dev/null
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+struct field;
+
+extern const struct field      bnobt_flds[];
+extern const struct field      bnobt_hfld[];
+extern const struct field      bnobt_key_flds[];
+extern const struct field      bnobt_rec_flds[];
+
+extern int     bnobt_size(void *obj, int startoff, int idx);
diff --git a/db/check.c b/db/check.c
new file mode 100644 (file)
index 0000000..b404422
--- /dev/null
@@ -0,0 +1,4468 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include <math.h>
+#include <getopt.h>
+#include <sys/time.h>
+#include "bmap.h"
+#include "check.h"
+#include "command.h"
+#include "data.h"
+#include "io.h"
+#include "output.h"
+#include "type.h"
+#include "mount.h"
+#include "malloc.h"
+
+typedef enum {
+       DBM_UNKNOWN,    DBM_AGF,        DBM_AGFL,       DBM_AGI,
+       DBM_ATTR,       DBM_BTBMAPA,    DBM_BTBMAPD,    DBM_BTBNO,
+       DBM_BTCNT,      DBM_BTINO,      DBM_DATA,       DBM_DIR,
+       DBM_FREE1,      DBM_FREE2,      DBM_FREELIST,   DBM_INODE,
+       DBM_LOG,        DBM_MISSING,    DBM_QUOTA,      DBM_RTBITMAP,
+       DBM_RTDATA,     DBM_RTFREE,     DBM_RTSUM,      DBM_SB,
+       DBM_SYMLINK,
+       DBM_NDBM
+} dbm_t;
+
+typedef struct inodata {
+       struct inodata  *next;
+       nlink_t         link_set;
+       nlink_t         link_add;
+       char            isdir;
+       char            security;
+       char            ilist;
+       xfs_ino_t       ino;
+       struct inodata  *parent;
+       char            *name;
+} inodata_t;
+#define        MIN_INODATA_HASH_SIZE   256
+#define        MAX_INODATA_HASH_SIZE   65536
+#define        INODATA_AVG_HASH_LENGTH 8
+
+typedef struct qinfo {
+       xfs_qcnt_t      bc;
+       xfs_qcnt_t      ic;
+       xfs_qcnt_t      rc;
+} qinfo_t;
+
+#define        QDATA_HASH_SIZE 256
+typedef        struct qdata {
+       struct qdata    *next;
+       xfs_dqid_t      id;
+       qinfo_t         count;
+       qinfo_t         dq;
+} qdata_t;
+
+typedef struct blkent {
+       xfs_fileoff_t   startoff;
+       int             nblks;
+       xfs_fsblock_t   blks[1];
+} blkent_t;
+#define        BLKENT_SIZE(n)  \
+       (offsetof(blkent_t, blks) + (sizeof(xfs_fsblock_t) * (n)))
+
+typedef        struct blkmap {
+       int             naents;
+       int             nents;
+       blkent_t        *ents[1];
+} blkmap_t;
+#define        BLKMAP_SIZE(n)  \
+       (offsetof(blkmap_t, ents) + (sizeof(blkent_t *) * (n)))
+
+typedef struct freetab {
+       int                     naents;
+       int                     nents;
+       xfs_dir2_data_off_t     ents[1];
+} freetab_t;
+#define        FREETAB_SIZE(n) \
+       (offsetof(freetab_t, ents) + (sizeof(xfs_dir2_data_off_t) * (n)))
+
+typedef struct dirhash {
+       struct dirhash          *next;
+       xfs_dir2_leaf_entry_t   entry;
+       int                     seen;
+} dirhash_t;
+#define        DIR_HASH_SIZE   1024
+#define        DIR_HASH_FUNC(h,a)      (((h) ^ (a)) % DIR_HASH_SIZE)
+
+static xfs_extlen_t    agffreeblks;
+static xfs_extlen_t    agflongest;
+static xfs_agino_t     agicount;
+static xfs_agino_t     agifreecount;
+static xfs_fsblock_t   *blist;
+static int             blist_size;
+static char            **dbmap;        /* really dbm_t:8 */
+static dirhash_t       **dirhash;
+static int             error;
+static __uint64_t      fdblocks;
+static __uint64_t      frextents;
+static __uint64_t      icount;
+static __uint64_t      ifree;
+static inodata_t       ***inodata;
+static int             inodata_hash_size;
+static inodata_t       ***inomap;
+static int             nflag;
+static int             pflag;
+static qdata_t         **qpdata;
+static int             qpdo;
+static qdata_t         **qudata;
+static int             qudo;
+static unsigned                sbversion;
+static int             sbver_err;
+static int             serious_error;
+static int             sflag;
+static xfs_suminfo_t   *sumcompute;
+static xfs_suminfo_t   *sumfile;
+static const char      *typename[] = {
+       "unknown",
+       "agf",
+       "agfl",
+       "agi",
+       "attr",
+       "btbmapa",
+       "btbmapd",
+       "btbno",
+       "btcnt",
+       "btino",
+       "data",
+       "dir",
+       "free1",
+       "free2",
+       "freelist",
+       "inode",
+       "log",
+       "missing",
+       "quota",
+       "rtbitmap",
+       "rtdata",
+       "rtfree",
+       "rtsum",
+       "sb",
+       "symlink",
+       NULL
+};
+static int             verbose;
+
+#define        CHECK_BLIST(b)  (blist_size && check_blist(b))
+#define        CHECK_BLISTA(a,b)       \
+       (blist_size && check_blist(XFS_AGB_TO_FSB(mp, a, b)))
+
+typedef void   (*scan_lbtree_f_t)(xfs_btree_lblock_t   *block,
+                                  int                  level,
+                                  dbm_t                type,
+                                  xfs_fsblock_t        bno,
+                                  inodata_t            *id,
+                                  xfs_drfsbno_t        *totd,
+                                  xfs_drfsbno_t        *toti,
+                                  xfs_extnum_t         *nex,
+                                  blkmap_t             **blkmapp,
+                                  int                  isroot,
+                                  typnm_t              btype);
+
+typedef void   (*scan_sbtree_f_t)(xfs_btree_sblock_t   *block,
+                                  int                  level,
+                                  xfs_agf_t            *agf,
+                                  xfs_agblock_t        bno,
+                                  int                  isroot);
+
+static void            add_blist(xfs_fsblock_t bno);
+static void            add_ilist(xfs_ino_t ino);
+static void            addlink_inode(inodata_t *id);
+static void            addname_inode(inodata_t *id, char *name, int namelen);
+static void            addparent_inode(inodata_t *id, xfs_ino_t parent);
+static void            blkent_append(blkent_t **entp, xfs_fsblock_t b,
+                                     xfs_extlen_t c);
+static blkent_t                *blkent_new(xfs_fileoff_t o, xfs_fsblock_t b,
+                                   xfs_extlen_t c);
+static void            blkent_prepend(blkent_t **entp, xfs_fsblock_t b,
+                                      xfs_extlen_t c);
+static blkmap_t                *blkmap_alloc(xfs_extnum_t);
+static void            blkmap_free(blkmap_t *blkmap);
+static xfs_fsblock_t   blkmap_get(blkmap_t *blkmap, xfs_fileoff_t o);
+static int             blkmap_getn(blkmap_t *blkmap, xfs_fileoff_t o, int nb,
+                                   bmap_ext_t **bmpp);
+static void            blkmap_grow(blkmap_t **blkmapp, blkent_t **entp,
+                                   blkent_t *newent);
+static xfs_fileoff_t   blkmap_next_off(blkmap_t *blkmap, xfs_fileoff_t o,
+                                       int *t);
+static void            blkmap_set_blk(blkmap_t **blkmapp, xfs_fileoff_t o,
+                                      xfs_fsblock_t b);
+static void            blkmap_set_ext(blkmap_t **blkmapp, xfs_fileoff_t o,
+                                      xfs_fsblock_t b, xfs_extlen_t c);
+static void            blkmap_shrink(blkmap_t *blkmap, blkent_t **entp);
+static int             blockfree_f(int argc, char **argv);
+static int             blockget_f(int argc, char **argv);
+#ifdef DEBUG
+static int             blocktrash_f(int argc, char **argv);
+#endif
+static int             blockuse_f(int argc, char **argv);
+static int             check_blist(xfs_fsblock_t bno);
+static void            check_dbmap(xfs_agnumber_t agno, xfs_agblock_t agbno,
+                                   xfs_extlen_t len, dbm_t type);
+static int             check_inomap(xfs_agnumber_t agno, xfs_agblock_t agbno,
+                                    xfs_extlen_t len, xfs_ino_t c_ino);
+static void            check_linkcounts(xfs_agnumber_t agno);
+static int             check_range(xfs_agnumber_t agno, xfs_agblock_t agbno,
+                                   xfs_extlen_t len);
+static void            check_rdbmap(xfs_drfsbno_t bno, xfs_extlen_t len,
+                                    dbm_t type);
+static int             check_rinomap(xfs_drfsbno_t bno, xfs_extlen_t len,
+                                     xfs_ino_t c_ino);
+static void            check_rootdir(void);
+static int             check_rrange(xfs_drfsbno_t bno, xfs_extlen_t len);
+static void            check_set_dbmap(xfs_agnumber_t agno,
+                                       xfs_agblock_t agbno, xfs_extlen_t len,
+                                       dbm_t type1, dbm_t type2,
+                                       xfs_agnumber_t c_agno,
+                                       xfs_agblock_t c_agbno);
+static void            check_set_rdbmap(xfs_drfsbno_t bno, xfs_extlen_t len,
+                                        dbm_t type1, dbm_t type2);
+static void            check_summary(void);
+static void            checknot_dbmap(xfs_agnumber_t agno, xfs_agblock_t agbno,
+                                      xfs_extlen_t len, int typemask);
+static void            checknot_rdbmap(xfs_drfsbno_t bno, xfs_extlen_t len,
+                                       int typemask);
+static void            dir_hash_add(xfs_dahash_t hash,
+                                    xfs_dir2_dataptr_t addr);
+static void            dir_hash_check(inodata_t *id, int v);
+static void            dir_hash_done(void);
+static void            dir_hash_init(void);
+static int             dir_hash_see(xfs_dahash_t hash,
+                                    xfs_dir2_dataptr_t addr);
+static inodata_t       *find_inode(xfs_ino_t ino, int add);
+static void            free_inodata(xfs_agnumber_t agno);
+static int             init(int argc, char **argv);
+static char            *inode_name(xfs_ino_t ino, inodata_t **ipp);
+static int             ncheck_f(int argc, char **argv);
+static char            *prepend_path(char *oldpath, char *parent);
+static xfs_ino_t       process_block_dir_v2(blkmap_t *blkmap, int *dot,
+                                            int *dotdot, inodata_t *id);
+static void            process_bmbt_reclist(xfs_bmbt_rec_32_t *rp, int numrecs,
+                                            dbm_t type, inodata_t *id,
+                                            xfs_drfsbno_t *tot,
+                                            blkmap_t **blkmapp);
+static void            process_btinode(inodata_t *id, xfs_dinode_t *dip,
+                                       dbm_t type, xfs_drfsbno_t *totd,
+                                       xfs_drfsbno_t *toti, xfs_extnum_t *nex,
+                                       blkmap_t **blkmapp, int whichfork);
+static xfs_ino_t       process_data_dir_v2(int *dot, int *dotdot,
+                                           inodata_t *id, int v,
+                                           xfs_dablk_t dabno,
+                                           freetab_t **freetabp);
+static xfs_dir2_data_free_t
+                       *process_data_dir_v2_freefind(xfs_dir2_data_t *data,
+                                                  xfs_dir2_data_unused_t *dup);
+static void            process_dir(xfs_dinode_t *dip, blkmap_t *blkmap,
+                                   inodata_t *id);
+static int             process_dir_v1(xfs_dinode_t *dip, blkmap_t *blkmap,
+                                      int *dot, int *dotdot, inodata_t *id,
+                                      xfs_ino_t *parent);
+static int             process_dir_v2(xfs_dinode_t *dip, blkmap_t *blkmap,
+                                      int *dot, int *dotdot, inodata_t *id,
+                                      xfs_ino_t *parent);
+static void            process_exinode(inodata_t *id, xfs_dinode_t *dip,
+                                       dbm_t type, xfs_drfsbno_t *totd,
+                                       xfs_drfsbno_t *toti, xfs_extnum_t *nex,
+                                       blkmap_t **blkmapp, int whichfork);
+static void            process_inode(xfs_agf_t *agf, xfs_agino_t agino,
+                                     xfs_dinode_t *dip, int isfree);
+static void            process_lclinode(inodata_t *id, xfs_dinode_t *dip,
+                                        dbm_t type, xfs_drfsbno_t *totd,
+                                        xfs_drfsbno_t *toti, xfs_extnum_t *nex,
+                                        blkmap_t **blkmapp, int whichfork);
+static xfs_ino_t       process_leaf_dir_v1(blkmap_t *blkmap, int *dot,
+                                           int *dotdot, inodata_t *id);
+static xfs_ino_t       process_leaf_dir_v1_int(int *dot, int *dotdot,
+                                               inodata_t *id);
+static xfs_ino_t       process_leaf_node_dir_v2(blkmap_t *blkmap, int *dot,
+                                                int *dotdot, inodata_t *id,
+                                                xfs_fsize_t dirsize);
+static void            process_leaf_node_dir_v2_free(inodata_t *id, int v,
+                                                     xfs_dablk_t dbno,
+                                                     freetab_t *freetab);
+static void            process_leaf_node_dir_v2_int(inodata_t *id, int v,
+                                                    xfs_dablk_t dbno,
+                                                    freetab_t *freetab);
+static xfs_ino_t       process_node_dir_v1(blkmap_t *blkmap, int *dot,
+                                           int *dotdot, inodata_t *id);
+static void            process_quota(int isproj, inodata_t *id,
+                                     blkmap_t *blkmap);
+static void            process_rtbitmap(blkmap_t *blkmap);
+static void            process_rtsummary(blkmap_t *blkmap);
+static xfs_ino_t       process_sf_dir_v2(xfs_dinode_t *dip, int *dot,
+                                         int *dotdot, inodata_t *id);
+static xfs_ino_t       process_shortform_dir_v1(xfs_dinode_t *dip, int *dot,
+                                                int *dotdot, inodata_t *id);
+static void            quota_add(xfs_dqid_t projid, xfs_dqid_t userid,
+                                 int dq, xfs_qcnt_t bc, xfs_qcnt_t ic,
+                                 xfs_qcnt_t rc);
+static void            quota_add1(qdata_t **qt, xfs_dqid_t id, int dq,
+                                  xfs_qcnt_t bc, xfs_qcnt_t ic,
+                                  xfs_qcnt_t rc);
+static void            quota_check(char *s, qdata_t **qt);
+static void            quota_init(void);
+static void            scan_ag(xfs_agnumber_t agno);
+static void            scan_freelist(xfs_agf_t *agf);
+static void            scan_lbtree(xfs_fsblock_t root, int nlevels,
+                                   scan_lbtree_f_t func, dbm_t type,
+                                   inodata_t *id, xfs_drfsbno_t *totd,
+                                   xfs_drfsbno_t *toti, xfs_extnum_t *nex,
+                                   blkmap_t **blkmapp, int isroot,
+                                   typnm_t btype);
+static void            scan_sbtree(xfs_agf_t *agf, xfs_agblock_t root,
+                                   int nlevels, int isroot,
+                                   scan_sbtree_f_t func, typnm_t btype);
+static void            scanfunc_bmap(xfs_btree_lblock_t *ablock, int level,
+                                     dbm_t type, xfs_fsblock_t bno,
+                                     inodata_t *id, xfs_drfsbno_t *totd,
+                                     xfs_drfsbno_t *toti, xfs_extnum_t *nex,
+                                     blkmap_t **blkmapp, int isroot,
+                                     typnm_t btype);
+static void            scanfunc_bno(xfs_btree_sblock_t *ablock, int level,
+                                    xfs_agf_t *agf, xfs_agblock_t bno,
+                                    int isroot);
+static void            scanfunc_cnt(xfs_btree_sblock_t *ablock, int level,
+                                    xfs_agf_t *agf, xfs_agblock_t bno,
+                                    int isroot);
+static void            scanfunc_ino(xfs_btree_sblock_t *ablock, int level,
+                                    xfs_agf_t *agf, xfs_agblock_t bno,
+                                    int isroot);
+static void            set_dbmap(xfs_agnumber_t agno, xfs_agblock_t agbno,
+                                 xfs_extlen_t len, dbm_t type,
+                                 xfs_agnumber_t c_agno, xfs_agblock_t c_agbno);
+static void            set_inomap(xfs_agnumber_t agno, xfs_agblock_t agbno,
+                                  xfs_extlen_t len, inodata_t *id);
+static void            set_rdbmap(xfs_drfsbno_t bno, xfs_extlen_t len,
+                                  dbm_t type);
+static void            set_rinomap(xfs_drfsbno_t bno, xfs_extlen_t len,
+                                   inodata_t *id);
+static void            setlink_inode(inodata_t *id, nlink_t nlink, int isdir,
+                                      int security);
+
+static const cmdinfo_t blockfree_cmd = 
+       { "blockfree", NULL, blockfree_f, 0, 0, 0,
+         NULL, "free block usage information", NULL };
+static const cmdinfo_t blockget_cmd = 
+       { "blockget", "check", blockget_f, 0, -1, 0,
+         "[-s|-v] [-n] [-b bno]... [-i ino] ...",
+         "get block usage and check consistency", NULL };
+#ifdef DEBUG
+static const cmdinfo_t blocktrash_cmd = 
+       { "blocktrash", NULL, blocktrash_f, 0, -1, 0,
+         "[-n count] [-x minlen] [-y maxlen] [-s seed] [-0123] [-t type] ...",
+         "trash randomly selected block(s)", NULL };
+#endif
+static const cmdinfo_t blockuse_cmd = 
+       { "blockuse", NULL, blockuse_f, 0, 3, 0,
+         "[-n] [-c blockcount]",
+         "print usage for current block(s)", NULL };
+static const cmdinfo_t ncheck_cmd = 
+       { "ncheck", NULL, ncheck_f, 0, -1, 0,
+         "[-s] [-i ino] ...",
+         "print inode-name pairs", NULL };
+
+
+static void
+add_blist(
+       xfs_fsblock_t   bno)
+{
+       blist_size++;
+       blist = xrealloc(blist, blist_size * sizeof(bno));
+       blist[blist_size - 1] = bno;
+}
+
+static void
+add_ilist(
+       xfs_ino_t       ino)
+{
+       inodata_t       *id;
+
+       id = find_inode(ino, 1);
+       if (id == NULL) {
+               dbprintf("-i %lld bad inode number\n", ino);
+               return;
+       }
+       id->ilist = 1;
+}
+
+static void
+addlink_inode(
+       inodata_t       *id)
+{
+       id->link_add++;
+       if (verbose || id->ilist)
+               dbprintf("inode %lld add link, now %u\n", id->ino,
+                       id->link_add);
+}
+
+static void
+addname_inode(
+       inodata_t       *id,
+       char            *name,
+       int             namelen)
+{
+       if (!nflag || id->name)
+               return;
+       id->name = xmalloc(namelen + 1);
+       memcpy(id->name, name, namelen);
+       id->name[namelen] = '\0';
+}
+
+static void 
+addparent_inode(
+       inodata_t       *id,
+       xfs_ino_t       parent)
+{
+       inodata_t       *pid;
+
+       pid = find_inode(parent, 1);
+       id->parent = pid;
+       if (verbose || id->ilist || (pid && pid->ilist))
+               dbprintf("inode %lld parent %lld\n", id->ino, parent);
+}
+
+static void
+blkent_append(
+       blkent_t        **entp,
+       xfs_fsblock_t   b,
+       xfs_extlen_t    c)
+{
+       blkent_t        *ent;
+       int             i;
+
+       ent = *entp;
+       *entp = ent = xrealloc(ent, BLKENT_SIZE(c + ent->nblks));
+       for (i = 0; i < c; i++)
+               ent->blks[ent->nblks + i] = b + i;
+       ent->nblks += c;
+}
+
+static blkent_t *
+blkent_new(
+       xfs_fileoff_t   o,
+       xfs_fsblock_t   b,
+       xfs_extlen_t    c)
+{
+       blkent_t        *ent;
+       int             i;
+
+       ent = xmalloc(BLKENT_SIZE(c));
+       ent->nblks = c;
+       ent->startoff = o;
+       for (i = 0; i < c; i++)
+               ent->blks[i] = b + i;
+       return ent;
+}
+
+static void
+blkent_prepend(
+       blkent_t        **entp,
+       xfs_fsblock_t   b,
+       xfs_extlen_t    c)
+{
+       int             i;
+       blkent_t        *newent;
+       blkent_t        *oldent;
+
+       oldent = *entp;
+       newent = xmalloc(BLKENT_SIZE(oldent->nblks + c));
+       newent->nblks = oldent->nblks + c;
+       newent->startoff = oldent->startoff - c;
+       for (i = 0; i < c; i++)
+               newent->blks[i] = b + c;
+       for (; i < oldent->nblks + c; i++)
+               newent->blks[i] = oldent->blks[i - c];
+       xfree(oldent);
+       *entp = newent;
+}
+
+static blkmap_t *
+blkmap_alloc(
+       xfs_extnum_t    nex)
+{
+       blkmap_t        *blkmap;
+
+       if (nex < 1)
+               nex = 1;
+       blkmap = xmalloc(BLKMAP_SIZE(nex));
+       blkmap->naents = nex;
+       blkmap->nents = 0;
+       return blkmap;
+}
+
+static void
+blkmap_free(
+       blkmap_t        *blkmap)
+{
+       blkent_t        **entp;
+       xfs_extnum_t    i;
+
+       for (i = 0, entp = blkmap->ents; i < blkmap->nents; i++, entp++)
+               xfree(*entp);
+       xfree(blkmap);
+}
+
+static xfs_fsblock_t
+blkmap_get(
+       blkmap_t        *blkmap,
+       xfs_fileoff_t   o)
+{
+       blkent_t        *ent;
+       blkent_t        **entp;
+       int             i;
+
+       for (i = 0, entp = blkmap->ents; i < blkmap->nents; i++, entp++) {
+               ent = *entp;
+               if (o >= ent->startoff && o < ent->startoff + ent->nblks)
+                       return ent->blks[o - ent->startoff];
+       }
+       return NULLFSBLOCK;
+}
+
+static int
+blkmap_getn(
+       blkmap_t        *blkmap,
+       xfs_fileoff_t   o,
+       int             nb,
+       bmap_ext_t      **bmpp)
+{
+       bmap_ext_t      *bmp;
+       blkent_t        *ent;
+       xfs_fileoff_t   ento;
+       blkent_t        **entp;
+       int             i;
+       int             nex;
+
+       for (i = nex = 0, bmp = NULL, entp = blkmap->ents;
+            i < blkmap->nents;
+            i++, entp++) {
+               ent = *entp;
+               if (ent->startoff >= o + nb)
+                       break;
+               if (ent->startoff + ent->nblks <= o)
+                       continue;
+               for (ento = ent->startoff;
+                    ento < ent->startoff + ent->nblks && ento < o + nb;
+                    ento++) {
+                       if (ento < o)
+                               continue;
+                       if (bmp &&
+                           bmp[nex - 1].startoff + bmp[nex - 1].blockcount ==
+                                   ento &&
+                           bmp[nex - 1].startblock + bmp[nex - 1].blockcount ==
+                                   ent->blks[ento - ent->startoff])
+                               bmp[nex - 1].blockcount++;
+                       else {
+                               bmp = realloc(bmp, ++nex * sizeof(*bmp));
+                               bmp[nex - 1].startoff = ento;
+                               bmp[nex - 1].startblock =
+                                       ent->blks[ento - ent->startoff];
+                               bmp[nex - 1].blockcount = 1;
+                               bmp[nex - 1].flag = 0;
+                       }
+               }
+       }
+       *bmpp = bmp;
+       return nex;
+}
+
+static void
+blkmap_grow(
+       blkmap_t        **blkmapp,
+       blkent_t        **entp,
+       blkent_t        *newent)
+{
+       blkmap_t        *blkmap;
+       int             i;
+       int             idx;
+
+       blkmap = *blkmapp;
+       idx = (int)(entp - blkmap->ents);
+       if (blkmap->naents == blkmap->nents) {
+               blkmap = xrealloc(blkmap, BLKMAP_SIZE(blkmap->nents + 1));
+               *blkmapp = blkmap;
+               blkmap->naents++;
+       }
+       for (i = blkmap->nents; i > idx; i--)
+               blkmap->ents[i] = blkmap->ents[i - 1];
+       blkmap->ents[idx] = newent;
+       blkmap->nents++;
+}
+
+static xfs_fileoff_t
+blkmap_last_off(
+       blkmap_t        *blkmap)
+{
+       blkent_t        *ent;
+
+       if (!blkmap->nents)
+               return NULLFILEOFF;
+       ent = blkmap->ents[blkmap->nents - 1];
+       return ent->startoff + ent->nblks;
+}
+
+static xfs_fileoff_t
+blkmap_next_off(
+       blkmap_t        *blkmap,
+       xfs_fileoff_t   o,
+       int             *t)
+{
+       blkent_t        *ent;
+       blkent_t        **entp;
+
+       if (!blkmap->nents)
+               return NULLFILEOFF;
+       if (o == NULLFILEOFF) {
+               *t = 0;
+               ent = blkmap->ents[0];
+               return ent->startoff;
+       }
+       entp = &blkmap->ents[*t];
+       ent = *entp;
+       if (o < ent->startoff + ent->nblks - 1)
+               return o + 1;
+       entp++;
+       if (entp >= &blkmap->ents[blkmap->nents])
+               return NULLFILEOFF;
+       (*t)++;
+       ent = *entp;
+       return ent->startoff;
+}
+
+static void
+blkmap_set_blk(
+       blkmap_t        **blkmapp,
+       xfs_fileoff_t   o,
+       xfs_fsblock_t   b)
+{
+       blkmap_t        *blkmap;
+       blkent_t        *ent;
+       blkent_t        **entp;
+       blkent_t        *nextent;
+
+       blkmap = *blkmapp;
+       for (entp = blkmap->ents; entp < &blkmap->ents[blkmap->nents]; entp++) {
+               ent = *entp;
+               if (o < ent->startoff - 1) {
+                       ent = blkent_new(o, b, 1);
+                       blkmap_grow(blkmapp, entp, ent);
+                       return;
+               }
+               if (o == ent->startoff - 1) {
+                       blkent_prepend(entp, b, 1);
+                       return;
+               }
+               if (o >= ent->startoff && o < ent->startoff + ent->nblks) {
+                       ent->blks[o - ent->startoff] = b;
+                       return;
+               }
+               if (o > ent->startoff + ent->nblks)
+                       continue;
+               blkent_append(entp, b, 1);
+               if (entp == &blkmap->ents[blkmap->nents - 1])
+                       return;
+               ent = *entp;
+               nextent = entp[1];
+               if (ent->startoff + ent->nblks < nextent->startoff)
+                       return;
+               blkent_append(entp, nextent->blks[0], nextent->nblks);
+               blkmap_shrink(blkmap, &entp[1]);
+               return;
+       }
+       ent = blkent_new(o, b, 1);
+       blkmap_grow(blkmapp, entp, ent);
+}
+
+static void
+blkmap_set_ext(
+       blkmap_t        **blkmapp,
+       xfs_fileoff_t   o,
+       xfs_fsblock_t   b,
+       xfs_extlen_t    c)
+{
+       blkmap_t        *blkmap;
+       blkent_t        *ent;
+       blkent_t        **entp;
+       xfs_extnum_t    i;
+
+       blkmap = *blkmapp;
+       if (!blkmap->nents) {
+               blkmap->ents[0] = blkent_new(o, b, c);
+               blkmap->nents = 1;
+               return;
+       }
+       entp = &blkmap->ents[blkmap->nents - 1];
+       ent = *entp;
+       if (ent->startoff + ent->nblks == o) {
+               blkent_append(entp, b, c);
+               return;
+       }
+       if (ent->startoff + ent->nblks < o) {
+               ent = blkent_new(o, b, c);
+               blkmap_grow(blkmapp, &blkmap->ents[blkmap->nents], ent);
+               return;
+       }
+       for (i = 0; i < c; i++)
+               blkmap_set_blk(blkmapp, o + i, b + i);
+}
+
+static void
+blkmap_shrink(
+       blkmap_t        *blkmap,
+       blkent_t        **entp)
+{
+       int             i;
+       int             idx;
+
+       xfree(*entp);
+       idx = (int)(entp - blkmap->ents);
+       for (i = idx + 1; i < blkmap->nents; i++)
+               blkmap->ents[i] = blkmap->ents[i - 1];
+       blkmap->nents--;
+}
+
+/* ARGSUSED */
+static int
+blockfree_f(
+       int             argc,
+       char            **argv)
+{
+       xfs_agnumber_t  c;
+       int             rt;
+
+       if (!dbmap) {
+               dbprintf("block usage information not allocated\n");
+               return 0;
+       }
+       rt = mp->m_sb.sb_rextents != 0;
+       for (c = 0; c < mp->m_sb.sb_agcount; c++) {
+               xfree(dbmap[c]);
+               xfree(inomap[c]);
+               free_inodata(c);
+       }
+       if (rt) {
+               xfree(dbmap[c]);
+               xfree(inomap[c]);
+               xfree(sumcompute);
+               xfree(sumfile);
+               sumcompute = sumfile = NULL;
+       }
+       xfree(dbmap);
+       xfree(inomap);
+       xfree(inodata);
+       dbmap = NULL;
+       inomap = NULL;
+       inodata = NULL;
+       return 0;
+}
+
+/*
+ * Check consistency of xfs filesystem contents.
+ */
+static int
+blockget_f(
+       int             argc,
+       char            **argv)
+{
+       xfs_agnumber_t  agno;
+       int             oldprefix;
+       int             sbyell;
+
+       if (dbmap) {
+               dbprintf("already have block usage information\n");
+               return 0;
+       }
+       if (!init(argc, argv))
+               return 0;
+       oldprefix = dbprefix;
+       dbprefix |= pflag;
+       for (agno = 0, sbyell = 0; agno < mp->m_sb.sb_agcount; agno++) {
+               scan_ag(agno);
+               if (sbver_err > 4 && !sbyell && sbver_err >= agno) {
+                       sbyell = 1;
+                       dbprintf("WARNING: this may be a newer XFS "
+                                "filesystem.\n");
+               }
+       }
+       if (blist_size) {
+               xfree(blist);
+               blist = NULL;
+               blist_size = 0;
+       }
+       if (serious_error) {
+               exitcode = 2;
+               dbprefix = oldprefix;
+               return 0;
+       }
+       check_rootdir();
+       for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
+               /*
+                * Check that there are no blocks either
+                * a) unaccounted for or 
+                * b) bno-free but not cnt-free
+                */
+               checknot_dbmap(agno, 0, mp->m_sb.sb_agblocks,
+                       (1 << DBM_UNKNOWN) | (1 << DBM_FREE1));
+               check_linkcounts(agno);
+       }
+       if (mp->m_sb.sb_rblocks) {
+               checknot_rdbmap(0,
+                       (xfs_extlen_t)(mp->m_sb.sb_rextents *
+                                      mp->m_sb.sb_rextsize),
+                       1 << DBM_UNKNOWN);
+               check_summary();
+       }
+       if (mp->m_sb.sb_icount != icount) {
+               if (!sflag)
+                       dbprintf("sb_icount %lld, counted %lld\n",
+                               mp->m_sb.sb_icount, icount);
+               error++;
+       }
+       if (mp->m_sb.sb_ifree != ifree) {
+               if (!sflag)
+                       dbprintf("sb_ifree %lld, counted %lld\n",
+                               mp->m_sb.sb_ifree, ifree);
+               error++;
+       }
+       if (mp->m_sb.sb_fdblocks != fdblocks) {
+               if (!sflag)
+                       dbprintf("sb_fdblocks %lld, counted %lld\n",
+                               mp->m_sb.sb_fdblocks, fdblocks);
+               error++;
+       }
+       if (mp->m_sb.sb_frextents != frextents) {
+               if (!sflag)
+                       dbprintf("sb_frextents %lld, counted %lld\n",
+                               mp->m_sb.sb_frextents, frextents);
+               error++;
+       }
+       if ((sbversion & XFS_SB_VERSION_ATTRBIT) &&
+           !XFS_SB_VERSION_HASATTR(&mp->m_sb)) {
+               if (!sflag)
+                       dbprintf("sb versionnum missing attr bit %x\n",
+                               XFS_SB_VERSION_ATTRBIT);
+               error++;
+       }
+       if ((sbversion & XFS_SB_VERSION_NLINKBIT) &&
+           !XFS_SB_VERSION_HASNLINK(&mp->m_sb)) {
+               if (!sflag)
+                       dbprintf("sb versionnum missing nlink bit %x\n",
+                               XFS_SB_VERSION_NLINKBIT);
+               error++;
+       }
+       if ((sbversion & XFS_SB_VERSION_QUOTABIT) &&
+           !XFS_SB_VERSION_HASQUOTA(&mp->m_sb)) {
+               if (!sflag)
+                       dbprintf("sb versionnum missing quota bit %x\n",
+                               XFS_SB_VERSION_QUOTABIT);
+               error++;
+       }
+       if (!(sbversion & XFS_SB_VERSION_ALIGNBIT) &&
+           XFS_SB_VERSION_HASALIGN(&mp->m_sb)) {
+               if (!sflag)
+                       dbprintf("sb versionnum extra align bit %x\n",
+                               XFS_SB_VERSION_ALIGNBIT);
+               error++;
+       }
+       if (qudo)
+               quota_check("user", qudata);
+       if (qpdo)
+               quota_check("project", qpdata);
+       if (sbver_err > mp->m_sb.sb_agcount / 2)
+               dbprintf("WARNING: this may be a newer XFS filesystem.\n");
+       if (error)
+               exitcode = 3;
+       dbprefix = oldprefix;
+       return 0;
+}
+
+#ifdef DEBUG
+typedef struct ltab {
+       int     min;
+       int     max;
+} ltab_t;
+
+static void
+blocktrash_b(
+       xfs_agnumber_t  agno,
+       xfs_agblock_t   agbno,
+       dbm_t           type,
+       ltab_t          *ltabp,
+       int             mode)
+{
+       int             bit;
+       int             bitno;
+       char            *buf;
+       int             byte;
+       int             len;
+       int             mask;
+       int             newbit;
+       int             offset;
+       static char     *modestr[] = {
+               "zeroed", "set", "flipped", "randomized"
+       };
+
+       len = (int)((random() % (ltabp->max - ltabp->min + 1)) + ltabp->min);
+       offset = (int)(random() % (int)(mp->m_sb.sb_blocksize * NBBY));
+       newbit = 0;
+       push_cur();
+       set_cur(&typtab[DBM_UNKNOWN],
+               XFS_AGB_TO_DADDR(mp, agno, agbno), blkbb, DB_RING_IGN, NULL);
+       if ((buf = iocur_top->data) == NULL) {
+               dbprintf("can't read block %u/%u for trashing\n", agno, agbno);
+               pop_cur();
+               return;
+       }
+       for (bitno = 0; bitno < len; bitno++) {
+               bit = (offset + bitno) % (mp->m_sb.sb_blocksize * NBBY);
+               byte = bit / NBBY;
+               bit %= NBBY;
+               mask = 1 << bit;
+               switch (mode) {
+               case 0:
+                       newbit = 0;
+                       break;
+               case 1:
+                       newbit = 1;
+                       break;
+               case 2:
+                       newbit = (buf[byte] & mask) == 0;
+                       break;
+               case 3:
+                       newbit = (int)random() & 1;
+                       break;
+               }
+               if (newbit)
+                       buf[byte] |= mask;
+               else
+                       buf[byte] &= ~mask;
+       }
+       write_cur();
+       pop_cur();
+       printf("blocktrash: %u/%u %s block %d bit%s starting %d:%d %s\n",
+               agno, agbno, typename[type], len, len == 1 ? "" : "s",
+               offset / NBBY, offset % NBBY, modestr[mode]);
+}
+
+int
+blocktrash_f(
+       int             argc,
+       char            **argv)
+{
+       xfs_agblock_t   agbno;
+       xfs_agnumber_t  agno;
+       xfs_drfsbno_t   bi;
+       xfs_drfsbno_t   blocks;
+       int             c;
+       int             count;
+       int             done;
+       int             goodmask;
+       int             i;
+       ltab_t          *lentab;
+       int             lentablen;
+       int             max;
+       int             min;
+       int             mode;
+       struct timeval  now;
+       char            *p;
+       xfs_drfsbno_t   randb;
+       uint            seed;
+       int             sopt;
+       int             tmask;
+
+       if (!dbmap) {
+               dbprintf("must run blockget first\n");
+               return 0;
+       }
+       optind = 0;
+       count = 1;
+       min = 1;
+       max = 128 * NBBY;
+       mode = 2;
+       gettimeofday(&now, NULL);
+       seed = (unsigned int)(now.tv_sec ^ now.tv_usec);
+       sopt = 0;
+       tmask = 0;
+       goodmask = (1 << DBM_AGF) |
+                  (1 << DBM_AGFL) |
+                  (1 << DBM_AGI) |
+                  (1 << DBM_ATTR) |
+                  (1 << DBM_BTBMAPA) |
+                  (1 << DBM_BTBMAPD) |
+                  (1 << DBM_BTBNO) |
+                  (1 << DBM_BTCNT) |
+                  (1 << DBM_BTINO) |
+                  (1 << DBM_DIR) |
+                  (1 << DBM_INODE) |
+                  (1 << DBM_QUOTA) |
+                  (1 << DBM_RTBITMAP) |
+                  (1 << DBM_RTSUM) |
+                  (1 << DBM_SB);
+       while ((c = getopt(argc, argv, "0123n:s:t:x:y:")) != EOF) {
+               switch (c) {
+               case '0':
+                       mode = 0;
+                       break;
+               case '1':
+                       mode = 1;
+                       break;
+               case '2':
+                       mode = 2;
+                       break;
+               case '3':
+                       mode = 3;
+                       break;
+               case 'n':
+                       count = (int)strtol(optarg, &p, 0);
+                       if (*p != '\0' || count <= 0) {
+                               dbprintf("bad blocktrash count %s\n", optarg);
+                               return 0;
+                       }
+                       break;
+               case 's':
+                       seed = (uint)strtoul(optarg, &p, 0);
+                       sopt = 1;
+                       break;
+               case 't':
+                       for (i = 0; typename[i]; i++) {
+                               if (strcmp(typename[i], optarg) == 0)
+                                       break;
+                       }
+                       if (!typename[i] || (((1 << i) & goodmask) == 0)) {
+                               dbprintf("bad blocktrash type %s\n", optarg);
+                               return 0;
+                       }
+                       tmask |= 1 << i;
+                       break;
+               case 'x':
+                       min = (int)strtol(optarg, &p, 0);
+                       if (*p != '\0' || min <= 0 ||
+                           min > mp->m_sb.sb_blocksize * NBBY) {
+                               dbprintf("bad blocktrash min %s\n", optarg);
+                               return 0;
+                       }
+                       break;
+               case 'y':
+                       max = (int)strtol(optarg, &p, 0);
+                       if (*p != '\0' || max <= 0 ||
+                           max > mp->m_sb.sb_blocksize * NBBY) {
+                               dbprintf("bad blocktrash max %s\n", optarg);
+                               return 0;
+                       }
+                       break;
+               default:
+                       dbprintf("bad option for blocktrash command\n");
+                       return 0;
+               }
+       }
+       if (min > max) {
+               dbprintf("bad min/max for blocktrash command\n");
+               return 0;
+       }
+       if (tmask == 0)
+               tmask = goodmask;
+       lentab = xmalloc(sizeof(ltab_t));
+       lentab->min = lentab->max = min;
+       lentablen = 1;
+       for (i = min + 1; i <= max; i++) {
+               if ((i & (i - 1)) == 0) {
+                       lentab = xrealloc(lentab,
+                               sizeof(ltab_t) * (lentablen + 1));
+                       lentab[lentablen].min = lentab[lentablen].max = i;
+                       lentablen++;
+               } else
+                       lentab[lentablen - 1].max = i;
+       }
+       for (blocks = 0, agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
+               for (agbno = 0, p = dbmap[agno];
+                    agbno < mp->m_sb.sb_agblocks;
+                    agbno++, p++) {
+                       if ((1 << *p) & tmask)
+                               blocks++;
+               }
+       }
+       if (blocks == 0) {
+               dbprintf("blocktrash: no matching blocks\n");
+               return 0;
+       }
+       if (!sopt)
+               dbprintf("blocktrash: seed %u\n", seed);
+       srandom(seed);
+       for (i = 0; i < count; i++) {
+               randb = (xfs_drfsbno_t)((((__int64_t)random() << 32) |
+                                        random()) % blocks);
+               for (bi = 0, agno = 0, done = 0;
+                    !done && agno < mp->m_sb.sb_agcount;
+                    agno++) {
+                       for (agbno = 0, p = dbmap[agno];
+                            agbno < mp->m_sb.sb_agblocks;
+                            agbno++, p++) {
+                               if (!((1 << *p) & tmask))
+                                       continue;
+                               if (bi++ < randb)
+                                       continue;
+                               blocktrash_b(agno, agbno, (dbm_t)*p,
+                                       &lentab[random() % lentablen], mode);
+                               done = 1;
+                               break;
+                       }
+               }
+       }
+       xfree(lentab);
+       return 0;
+}
+#endif
+
+int
+blockuse_f(
+       int             argc,
+       char            **argv)
+{
+       xfs_agblock_t   agbno;
+       xfs_agnumber_t  agno;
+       int             c;
+       int             count;
+       xfs_agblock_t   end;
+       xfs_fsblock_t   fsb;
+       inodata_t       *i;
+       char            *p;
+       int             shownames;
+
+       if (!dbmap) {
+               dbprintf("must run blockget first\n");
+               return 0;
+       }
+       optind = 0;
+       count = 1;
+       shownames = 0;
+       fsb = XFS_DADDR_TO_FSB(mp, iocur_top->off >> BBSHIFT);
+       agno = XFS_FSB_TO_AGNO(mp, fsb);
+       end = agbno = XFS_FSB_TO_AGBNO(mp, fsb);
+       while ((c = getopt(argc, argv, "c:n")) != EOF) {
+               switch (c) {
+               case 'c':
+                       count = (int)strtol(optarg, &p, 0);
+                       end = agbno + count - 1;
+                       if (*p != '\0' || count <= 0 ||
+                           end >= mp->m_sb.sb_agblocks) {
+                               dbprintf("bad blockuse count %s\n", optarg);
+                               return 0;
+                       }
+                       break;
+               case 'n':
+                       if (!nflag) {
+                               dbprintf("must run blockget -n first\n");
+                               return 0;
+                       }
+                       shownames = 1;
+                       break;
+               default:
+                       dbprintf("bad option for blockuse command\n");
+                       return 0;
+               }
+       }
+       while (agbno <= end) {
+               p = &dbmap[agno][agbno];
+               i = inomap[agno][agbno];
+               dbprintf("block %llu (%u/%u) type %s",
+                       (xfs_dfsbno_t)XFS_AGB_TO_FSB(mp, agno, agbno),
+                       agno, agbno, typename[(dbm_t)*p]);
+               if (i) {
+                       dbprintf(" inode %lld", i->ino);
+                       if (shownames && (p = inode_name(i->ino, NULL))) {
+                               dbprintf(" %s", p);
+                               xfree(p);
+                       }
+               }
+               dbprintf("\n");
+               agbno++;
+       }
+       return 0;
+}
+
+static int
+check_blist(
+       xfs_fsblock_t   bno)
+{
+       int             i;
+
+       for (i = 0; i < blist_size; i++) {
+               if (blist[i] == bno)
+                       return 1;
+       }
+       return 0;
+}
+
+static void
+check_dbmap(
+       xfs_agnumber_t  agno,
+       xfs_agblock_t   agbno,
+       xfs_extlen_t    len,
+       dbm_t           type)
+{
+       xfs_extlen_t    i;
+       char            *p;
+
+       for (i = 0, p = &dbmap[agno][agbno]; i < len; i++, p++) {
+               if ((dbm_t)*p != type) {
+                       if (!sflag || CHECK_BLISTA(agno, agbno + i))
+                               dbprintf("block %u/%u expected type %s got "
+                                        "%s\n",
+                                       agno, agbno + i, typename[type],
+                                       typename[(dbm_t)*p]);
+                       error++;
+               }
+       }
+}
+
+void
+check_init(void)
+{
+       add_command(&blockfree_cmd);
+       add_command(&blockget_cmd);
+#ifdef DEBUG
+       add_command(&blocktrash_cmd);
+#endif
+       add_command(&blockuse_cmd);
+       add_command(&ncheck_cmd);
+}
+
+static int
+check_inomap(
+       xfs_agnumber_t  agno,
+       xfs_agblock_t   agbno,
+       xfs_extlen_t    len,
+       xfs_ino_t       c_ino)
+{
+       xfs_extlen_t    i;
+       inodata_t       **idp;
+       int             rval;
+
+       if (!check_range(agno, agbno, len))  {
+               dbprintf("blocks %u/%u..%u claimed by inode %lld\n",
+                       agno, agbno, agbno + len - 1, c_ino);
+               return 0;
+       }
+       for (i = 0, rval = 1, idp = &inomap[agno][agbno]; i < len; i++, idp++) {
+               if (*idp) {
+                       if (!sflag || (*idp)->ilist ||
+                           CHECK_BLISTA(agno, agbno + i))
+                               dbprintf("block %u/%u claimed by inode %lld, "
+                                        "previous inum %lld\n",
+                                       agno, agbno + i, c_ino, (*idp)->ino);
+                       error++;
+                       rval = 0;
+               }
+       }
+       return rval;
+}
+
+static void
+check_linkcounts(
+       xfs_agnumber_t  agno)
+{
+       inodata_t       *ep;
+       inodata_t       **ht;
+       int             idx;
+       char            *path;
+
+       ht = inodata[agno];
+       for (idx = 0; idx < inodata_hash_size; ht++, idx++) {
+               ep = *ht;
+               while (ep) {
+                       if (ep->link_set != ep->link_add || ep->link_set == 0) {
+                               path = inode_name(ep->ino, NULL);
+                               if (!path && ep->link_add)
+                                       path = xstrdup("?");
+                               if (!sflag || ep->ilist) {
+                                       if (ep->link_add)
+                                               dbprintf("link count mismatch "
+                                                        "for inode %lld (name "
+                                                        "%s), nlink %d, "
+                                                        "counted %d\n",
+                                                       ep->ino, path,
+                                                       ep->link_set,
+                                                       ep->link_add);
+                                       else if (ep->link_set)
+                                               dbprintf("disconnected inode "
+                                                        "%lld, nlink %d\n",
+                                                       ep->ino, ep->link_set);
+                                       else
+                                               dbprintf("allocated inode %lld "
+                                                        "has 0 link count\n",
+                                                       ep->ino);
+                               }
+                               if (path)
+                                       xfree(path);
+                               error++;
+                       } else if (verbose || ep->ilist) {
+                               path = inode_name(ep->ino, NULL);
+                               if (path) {
+                                       dbprintf("inode %lld name %s\n",
+                                               ep->ino, path);
+                                       xfree(path);
+                               }
+                       }
+                       ep = ep->next;
+               }
+       }
+               
+}
+
+static int
+check_range(
+       xfs_agnumber_t  agno,
+       xfs_agblock_t   agbno,
+       xfs_extlen_t    len)
+{
+       xfs_extlen_t    i;
+
+       if (agno >= mp->m_sb.sb_agcount ||
+           agbno + len - 1 >= mp->m_sb.sb_agblocks) {
+               for (i = 0; i < len; i++) {
+                       if (!sflag || CHECK_BLISTA(agno, agbno + i))
+                               dbprintf("block %u/%u out of range\n",
+                                       agno, agbno + i);
+               }
+               error++;
+               return 0;
+       }
+       return 1;
+}
+
+static void
+check_rdbmap(
+       xfs_drfsbno_t   bno,
+       xfs_extlen_t    len,
+       dbm_t           type)
+{
+       xfs_extlen_t    i;
+       char            *p;
+
+       for (i = 0, p = &dbmap[mp->m_sb.sb_agcount][bno]; i < len; i++, p++) {
+               if ((dbm_t)*p != type) {
+                       if (!sflag || CHECK_BLIST(bno + i))
+                               dbprintf("rtblock %llu expected type %s got "
+                                        "%s\n",
+                                       bno + i, typename[type],
+                                       typename[(dbm_t)*p]);
+                       error++;
+               }
+       }
+}
+
+static int
+check_rinomap(
+       xfs_drfsbno_t   bno,
+       xfs_extlen_t    len,
+       xfs_ino_t       c_ino)
+{
+       xfs_extlen_t    i;
+       inodata_t       **idp;
+       int             rval;
+
+       if (!check_rrange(bno, len)) {
+               dbprintf("rtblocks %llu..%llu claimed by inode %lld\n",
+                       bno, bno + len - 1, c_ino);
+               return 0;
+       }
+       for (i = 0, rval = 1, idp = &inomap[mp->m_sb.sb_agcount][bno];
+            i < len;
+            i++, idp++) {
+               if (*idp) {
+                       if (!sflag || (*idp)->ilist || CHECK_BLIST(bno + i))
+                               dbprintf("rtblock %llu claimed by inode %lld, "
+                                        "previous inum %lld\n",
+                                       bno + i, c_ino, (*idp)->ino);
+                       error++;
+                       rval = 0;
+               }
+       }
+       return rval;
+}
+
+static void
+check_rootdir(void)
+{
+       inodata_t       *id;
+
+       id = find_inode(mp->m_sb.sb_rootino, 0);
+       if (id == NULL) {
+               if (!sflag)
+                       dbprintf("root inode %lld is missing\n",
+                               mp->m_sb.sb_rootino);
+               error++;
+       } else if (!id->isdir) {
+               if (!sflag || id->ilist)
+                       dbprintf("root inode %lld is not a directory\n",
+                               mp->m_sb.sb_rootino);
+               error++;
+       }
+}
+
+static int
+check_rrange(
+       xfs_drfsbno_t   bno,
+       xfs_extlen_t    len)
+{
+       xfs_extlen_t    i;
+
+       if (bno + len - 1 >= mp->m_sb.sb_rblocks) {
+               for (i = 0; i < len; i++) {
+                       if (!sflag || CHECK_BLIST(bno + i))
+                               dbprintf("rtblock %llu out of range\n",
+                                       bno + i);
+               }
+               error++;
+               return 0;
+       }
+       return 1;
+}
+
+static void
+check_set_dbmap(
+       xfs_agnumber_t  agno,
+       xfs_agblock_t   agbno,
+       xfs_extlen_t    len,
+       dbm_t           type1,
+       dbm_t           type2,
+       xfs_agnumber_t  c_agno,
+       xfs_agblock_t   c_agbno)
+{
+       xfs_extlen_t    i;
+       int             mayprint;
+       char            *p;
+
+       if (!check_range(agno, agbno, len))  {
+               dbprintf("blocks %u/%u..%u claimed by block %u/%u\n", agno,
+                       agbno, agbno + len - 1, c_agno, c_agbno);
+               return;
+       }
+       check_dbmap(agno, agbno, len, type1);
+       mayprint = verbose | blist_size;
+       for (i = 0, p = &dbmap[agno][agbno]; i < len; i++, p++) {
+               *p = (char)type2;
+               if (mayprint && (verbose || CHECK_BLISTA(agno, agbno + i)))
+                       dbprintf("setting block %u/%u to %s\n", agno, agbno + i,
+                               typename[type2]);
+       }
+}
+
+static void
+check_set_rdbmap(
+       xfs_drfsbno_t   bno,
+       xfs_extlen_t    len,
+       dbm_t           type1,
+       dbm_t           type2)
+{
+       xfs_extlen_t    i;
+       int             mayprint;
+       char            *p;
+
+       if (!check_rrange(bno, len))
+               return;
+       check_rdbmap(bno, len, type1);
+       mayprint = verbose | blist_size;
+       for (i = 0, p = &dbmap[mp->m_sb.sb_agcount][bno]; i < len; i++, p++) {
+               *p = (char)type2;
+               if (mayprint && (verbose || CHECK_BLIST(bno + i)))
+                       dbprintf("setting rtblock %llu to %s\n",
+                               bno + i, typename[type2]);
+       }
+}
+
+static void
+check_summary(void)
+{
+       xfs_drfsbno_t   bno;
+       xfs_suminfo_t   *csp;
+       xfs_suminfo_t   *fsp;
+       int             log;
+
+       csp = sumcompute;
+       fsp = sumfile;
+       for (log = 0; log < mp->m_rsumlevels; log++) {
+               for (bno = 0;
+                    bno < mp->m_sb.sb_rbmblocks;
+                    bno++, csp++, fsp++) {
+                       if (*csp != *fsp) {
+                               if (!sflag)
+                                       dbprintf("rt summary mismatch, size %d "
+                                                "block %llu, file: %d, "
+                                                "computed: %d\n",
+                                               log, bno, *fsp, *csp);
+                               error++;
+                       }
+               }
+       }
+}
+
+static void
+checknot_dbmap(
+       xfs_agnumber_t  agno,
+       xfs_agblock_t   agbno,
+       xfs_extlen_t    len,
+       int             typemask)
+{
+       xfs_extlen_t    i;
+       char            *p;
+
+       if (!check_range(agno, agbno, len))
+               return;
+       for (i = 0, p = &dbmap[agno][agbno]; i < len; i++, p++) {
+               if ((1 << *p) & typemask) {
+                       if (!sflag || CHECK_BLISTA(agno, agbno + i))
+                               dbprintf("block %u/%u type %s not expected\n",
+                                       agno, agbno + i, typename[(dbm_t)*p]);
+                       error++;
+               }
+       }
+}
+
+static void
+checknot_rdbmap(
+       xfs_drfsbno_t   bno,
+       xfs_extlen_t    len,
+       int             typemask)
+{
+       xfs_extlen_t    i;
+       char            *p;
+
+       if (!check_rrange(bno, len))
+               return;
+       for (i = 0, p = &dbmap[mp->m_sb.sb_agcount][bno]; i < len; i++, p++) {
+               if ((1 << *p) & typemask) {
+                       if (!sflag || CHECK_BLIST(bno + i))
+                               dbprintf("rtblock %llu type %s not expected\n",
+                                       bno + i, typename[(dbm_t)*p]);
+                       error++;
+               }
+       }
+}
+
+static void
+dir_hash_add(
+       xfs_dahash_t            hash,
+       xfs_dir2_dataptr_t      addr)
+{
+       int                     i;
+       dirhash_t               *p;
+
+       i = DIR_HASH_FUNC(hash, addr);
+       p = malloc(sizeof(*p));
+       p->next = dirhash[i];
+       dirhash[i] = p;
+       p->entry.hashval = hash;
+       p->entry.address = addr;
+       p->seen = 0;
+}
+
+static void
+dir_hash_check(
+       inodata_t       *id,
+       int             v)
+{
+       int             i;
+       dirhash_t       *p;
+
+       for (i = 0; i < DIR_HASH_SIZE; i++) {
+               for (p = dirhash[i]; p; p = p->next) {
+                       if (p->seen)
+                               continue;
+                       if (!sflag || id->ilist || v)
+                               dbprintf("dir ino %lld missing leaf entry for "
+                                        "%x/%x\n",
+                                       id->ino, p->entry.hashval,
+                                       p->entry.address);
+                       error++;
+               }
+       }
+}
+
+static void
+dir_hash_done(void)
+{
+       int             i;
+       dirhash_t       *n;
+       dirhash_t       *p;
+
+       for (i = 0; i < DIR_HASH_SIZE; i++) {
+               for (p = dirhash[i]; p; p = n) {
+                       n = p->next;
+                       free(p);
+               }
+               dirhash[i] = NULL;
+       }
+}
+
+static void
+dir_hash_init(void)
+{
+       if (!dirhash)
+               dirhash = calloc(DIR_HASH_SIZE, sizeof(*dirhash));
+}
+
+static int
+dir_hash_see(
+       xfs_dahash_t            hash,
+       xfs_dir2_dataptr_t      addr)
+{
+       int                     i;
+       dirhash_t               *p;
+
+       i = DIR_HASH_FUNC(hash, addr);
+       for (p = dirhash[i]; p; p = p->next) {
+               if (p->entry.hashval == hash && p->entry.address == addr) {
+                       if (p->seen)
+                               return 1;
+                       p->seen = 1;
+                       return 0;
+               }
+       }
+       return -1;
+}
+
+static inodata_t *
+find_inode(
+       xfs_ino_t       ino,
+       int             add)
+{
+       xfs_agino_t     agino;
+       xfs_agnumber_t  agno;
+       inodata_t       *ent;
+       inodata_t       **htab;
+       xfs_agino_t     ih;
+
+       agno = XFS_INO_TO_AGNO(mp, ino);
+       agino = XFS_INO_TO_AGINO(mp, ino);
+       if (agno >= mp->m_sb.sb_agcount ||
+           XFS_AGINO_TO_INO(mp, agno, agino) != ino)
+               return NULL;
+       htab = inodata[agno];
+       ih = agino % inodata_hash_size;
+       ent = htab[ih];
+       while (ent) {
+               if (ent->ino == ino)
+                       return ent;
+               ent = ent->next;
+       }
+       if (!add)
+               return NULL;
+       ent = xcalloc(1, sizeof(*ent));
+       ent->ino = ino;
+       ent->next = htab[ih];
+       htab[ih] = ent;
+       return ent;
+}
+
+static void
+free_inodata(
+       xfs_agnumber_t  agno)
+{
+       inodata_t       *hp;
+       inodata_t       **ht;
+       int             i;
+       inodata_t       *next;
+
+       ht = inodata[agno];
+       for (i = 0; i < inodata_hash_size; i++) {
+               hp = ht[i];
+               while (hp) {
+                       next = hp->next;
+                       if (hp->name)
+                               xfree(hp->name);
+                       xfree(hp);
+                       hp = next;
+               }
+       }
+       xfree(ht);
+}
+
+static int
+init(
+       int             argc,
+       char            **argv)
+{
+       xfs_fsblock_t   bno;
+       int             c;
+       xfs_ino_t       ino;
+       int             rt;
+
+       if (mp->m_sb.sb_magicnum != XFS_SB_MAGIC) {
+               dbprintf("bad superblock magic number %x, giving up\n",
+                       mp->m_sb.sb_magicnum);
+               return 0;
+       }
+       rt = mp->m_sb.sb_rextents != 0;
+       dbmap = xmalloc((mp->m_sb.sb_agcount + rt) * sizeof(*dbmap));
+       inomap = xmalloc((mp->m_sb.sb_agcount + rt) * sizeof(*inomap));
+       inodata = xmalloc(mp->m_sb.sb_agcount * sizeof(*inodata));
+       inodata_hash_size =
+               (int)MAX(MIN(mp->m_sb.sb_icount /
+                               (INODATA_AVG_HASH_LENGTH * mp->m_sb.sb_agcount),
+                            MAX_INODATA_HASH_SIZE),
+                        MIN_INODATA_HASH_SIZE);
+       for (c = 0; c < mp->m_sb.sb_agcount; c++) {
+               dbmap[c] = xcalloc(mp->m_sb.sb_agblocks, sizeof(**dbmap));
+               inomap[c] = xcalloc(mp->m_sb.sb_agblocks, sizeof(**inomap));
+               inodata[c] = xcalloc(inodata_hash_size, sizeof(**inodata));
+       }
+       if (rt) {
+               dbmap[c] = xcalloc(mp->m_sb.sb_rblocks, sizeof(**dbmap));
+               inomap[c] = xcalloc(mp->m_sb.sb_rblocks, sizeof(**inomap));
+               sumfile = xcalloc(mp->m_rsumsize, 1);
+               sumcompute = xcalloc(mp->m_rsumsize, 1);
+       }
+       nflag = sflag = verbose = optind = 0;
+       while ((c = getopt(argc, argv, "b:i:npsv")) != EOF) {
+               switch (c) {
+               case 'b':
+                       bno = atoll(optarg);
+                       add_blist(bno);
+                       break;
+               case 'i':
+                       ino = atoll(optarg);
+                       add_ilist(ino);
+                       break;
+               case 'n':
+                       nflag = 1;
+                       break;
+               case 'p':
+                       pflag = 1;
+                       break;
+               case 's':
+                       sflag = 1;
+                       break;
+               case 'v':
+                       verbose = 1;
+                       break;
+               default:
+                       dbprintf("bad option for blockget command\n");
+                       return 0;
+               }
+       }
+       error = sbver_err = serious_error = 0;
+       fdblocks = frextents = icount = ifree = 0;
+       sbversion = XFS_SB_VERSION_4;
+       if (mp->m_sb.sb_inoalignmt)
+               sbversion |= XFS_SB_VERSION_ALIGNBIT;
+       if ((mp->m_sb.sb_uquotino && mp->m_sb.sb_uquotino != NULLFSINO) ||
+           (mp->m_sb.sb_pquotino && mp->m_sb.sb_pquotino != NULLFSINO))
+               sbversion |= XFS_SB_VERSION_QUOTABIT;
+       quota_init();
+       return 1;
+}
+
+static char *
+inode_name(
+       xfs_ino_t       ino,
+       inodata_t       **ipp)
+{
+       inodata_t       *id;
+       char            *npath;
+       char            *path;
+
+       id = find_inode(ino, 0);
+       if (ipp)
+               *ipp = id;
+       if (id == NULL)
+               return NULL;
+       if (id->name == NULL)
+               return NULL;
+       path = xstrdup(id->name);
+       while (id->parent) {
+               id = id->parent;
+               if (id->name == NULL)
+                       break;
+               npath = prepend_path(path, id->name);
+               xfree(path);
+               path = npath;
+       }
+       return path;
+}
+
+static int
+ncheck_f(
+       int             argc,
+       char            **argv)
+{
+       xfs_agnumber_t  agno;
+       int             c;
+       inodata_t       *hp;
+       inodata_t       **ht;
+       int             i;
+       inodata_t       *id;
+       xfs_ino_t       *ilist;
+       int             ilist_size;
+       xfs_ino_t       *ilp;
+       xfs_ino_t       ino;
+       char            *p;
+       int             security;
+
+       if (!inodata || !nflag) {
+               dbprintf("must run blockget -n first\n");
+               return 0;
+       }
+       security = optind = ilist_size = 0;
+       ilist = NULL;
+       while ((c = getopt(argc, argv, "i:s")) != EOF) {
+               switch (c) {
+               case 'i':
+                       ino = atoll(optarg);
+                       ilist = xrealloc(ilist, (ilist_size + 1) *
+                               sizeof(*ilist));
+                       ilist[ilist_size++] = ino;
+                       break;
+               case 's':
+                       security = 1;
+                       break;
+               default:
+                       dbprintf("bad option -%c for ncheck command\n", c);
+                       return 0;
+               }
+       }
+       if (ilist) {
+               for (ilp = ilist; ilp < &ilist[ilist_size]; ilp++) {
+                       ino = *ilp;
+                       if (p = inode_name(ino, &hp)) {
+                               dbprintf("%11llu %s", ino, p);
+                               if (hp->isdir)
+                                       dbprintf("/.");
+                               dbprintf("\n");
+                               xfree(p);
+                       }
+               }
+               xfree(ilist);
+               return 0;
+       }
+       for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
+               ht = inodata[agno];
+               for (i = 0; i < inodata_hash_size; i++) {
+                       hp = ht[i];
+                       for (hp = ht[i]; hp; hp = hp->next) {
+                               ino = XFS_AGINO_TO_INO(mp, agno, hp->ino);
+                               p = inode_name(ino, &id);
+                               if (!p || !id)
+                                       continue;
+                               if (!security || id->security) {
+                                       dbprintf("%11llu %s", ino, p);
+                                       if (hp->isdir)
+                                               dbprintf("/.");
+                                       dbprintf("\n");
+                               }
+                               xfree(p);
+                       }
+               }
+       }
+       return 0;
+}
+
+static char *
+prepend_path(
+       char    *oldpath,
+       char    *parent)
+{
+       int     len;
+       char    *path;
+
+       len = (int)(strlen(oldpath) + strlen(parent) + 2);
+       path = xmalloc(len);
+       sprintf(path, "%s/%s", parent, oldpath);
+       return path;
+}
+
+static xfs_ino_t
+process_block_dir_v2(
+       blkmap_t        *blkmap,
+       int             *dot,
+       int             *dotdot,
+       inodata_t       *id)
+{
+       xfs_fsblock_t   b;
+       bbmap_t         bbmap;
+       bmap_ext_t      *bmp;
+       int             nex;
+       xfs_ino_t       parent;
+       int             v;
+       int             x;
+
+       nex = blkmap_getn(blkmap, 0, mp->m_dirblkfsbs, &bmp);
+       v = id->ilist || verbose;
+       if (nex == 0) {
+               if (!sflag || v)
+                       dbprintf("block 0 for directory inode %lld is "
+                                "missing\n",
+                               id->ino);
+               error++;
+               return 0;
+       }
+       push_cur();
+       if (nex > 1)
+               make_bbmap(&bbmap, nex, bmp);
+       set_cur(&typtab[TYP_DIR], XFS_FSB_TO_DADDR(mp, bmp->startblock),
+               mp->m_dirblkfsbs * blkbb, DB_RING_IGN, nex > 1 ? &bbmap : NULL);
+       for (x = 0; !v && x < nex; x++) {
+               for (b = bmp[x].startblock;
+                    !v && b < bmp[x].startblock + bmp[x].blockcount;
+                    b++)
+                       v = CHECK_BLIST(b);
+       }
+       free(bmp);
+       if (iocur_top->data == NULL) {
+               if (!sflag || id->ilist || v)
+                       dbprintf("can't read block 0 for directory inode "
+                                "%lld\n",
+                               id->ino);
+               error++;
+               return 0;
+       }
+       dir_hash_init();
+       parent = process_data_dir_v2(dot, dotdot, id, v, mp->m_dirdatablk,
+               NULL);
+       dir_hash_check(id, v);
+       dir_hash_done();
+       pop_cur();
+       return parent;
+}
+
+static void
+process_bmbt_reclist(
+       xfs_bmbt_rec_32_t       *rp,
+       int                     numrecs,
+       dbm_t                   type,
+       inodata_t               *id,
+       xfs_drfsbno_t           *tot,
+       blkmap_t                **blkmapp)
+{
+       xfs_agblock_t           agbno;
+       xfs_agnumber_t          agno;
+       xfs_fsblock_t           b;
+       xfs_dfilblks_t          c;
+       xfs_dfilblks_t          cp;
+       int                     f;
+       int                     i;
+       xfs_agblock_t           iagbno;
+       xfs_agnumber_t          iagno;
+       xfs_dfiloff_t           o;
+       xfs_dfiloff_t           op;
+       xfs_dfsbno_t            s;
+       int                     v;
+
+       cp = op = 0;
+       v = verbose || id->ilist;
+       iagno = XFS_INO_TO_AGNO(mp, id->ino);
+       iagbno = XFS_INO_TO_AGBNO(mp, id->ino);
+       for (i = 0; i < numrecs; i++, rp++) {
+               convert_extent((xfs_bmbt_rec_64_t *)rp, &o, &s, &c, &f);
+               if (v)
+                       dbprintf("inode %lld extent [%lld,%lld,%lld,%d]\n",
+                               id->ino, o, s, c, f);
+               if (!sflag && i > 0 && op + cp > o)
+                       dbprintf("bmap rec out of order, inode %lld entry %d\n",
+                               id->ino, i);
+               op = o;
+               cp = c;
+               if (type == DBM_RTDATA) {
+                       if (!sflag && s >= mp->m_sb.sb_rblocks) {
+                               dbprintf("inode %lld bad rt block number %lld, "
+                                        "offset %lld\n",
+                                       id->ino, s, o);
+                               continue;
+                       }
+               } else if (!sflag) {
+                       agno = XFS_FSB_TO_AGNO(mp, s);
+                       agbno = XFS_FSB_TO_AGBNO(mp, s);
+                       if (agno >= mp->m_sb.sb_agcount ||
+                           agbno >= mp->m_sb.sb_agblocks) {
+                               dbprintf("inode %lld bad block number %lld "
+                                        "[%d,%d], offset %lld\n",
+                                       id->ino, s, agno, agbno, o);
+                               continue;
+                       }
+                       if (agbno + c - 1 >= mp->m_sb.sb_agblocks) {
+                               dbprintf("inode %lld bad block number %lld "
+                                        "[%d,%d], offset %lld\n",
+                                       id->ino, s + c - 1, agno,
+                                       agbno + (xfs_agblock_t)c - 1, o);
+                               continue;
+                       }
+               }
+               if (blkmapp && *blkmapp)
+                       blkmap_set_ext(blkmapp, (xfs_fileoff_t)o,
+                               (xfs_fsblock_t)s, (xfs_extlen_t)c);
+               if (type == DBM_RTDATA) {
+                       set_rdbmap((xfs_fsblock_t)s, (xfs_extlen_t)c,
+                               DBM_RTDATA);
+                       set_rinomap((xfs_fsblock_t)s, (xfs_extlen_t)c, id);
+                       for (b = (xfs_fsblock_t)s;
+                            blist_size && b < s + c;
+                            b++, o++) {
+                               if (CHECK_BLIST(b))
+                                       dbprintf("inode %lld block %lld at "
+                                                "offset %lld\n",
+                                               id->ino, (xfs_dfsbno_t)b, o);
+                       }
+               } else {
+                       agno = XFS_FSB_TO_AGNO(mp, (xfs_fsblock_t)s);
+                       agbno = XFS_FSB_TO_AGBNO(mp, (xfs_fsblock_t)s);
+                       set_dbmap(agno, agbno, (xfs_extlen_t)c, type, iagno,
+                               iagbno);
+                       set_inomap(agno, agbno, (xfs_extlen_t)c, id);
+                       for (b = (xfs_fsblock_t)s;
+                            blist_size && b < s + c;
+                            b++, o++, agbno++) {
+                               if (CHECK_BLIST(b))
+                                       dbprintf("inode %lld block %lld at "
+                                                "offset %lld\n",
+                                               id->ino, (xfs_dfsbno_t)b, o);
+                       }
+               }
+               *tot += c;
+       }
+}
+
+static void
+process_btinode(
+       inodata_t               *id,
+       xfs_dinode_t            *dip,
+       dbm_t                   type,
+       xfs_drfsbno_t           *totd,
+       xfs_drfsbno_t           *toti,
+       xfs_extnum_t            *nex,
+       blkmap_t                **blkmapp,
+       int                     whichfork)
+{
+       xfs_bmdr_block_t        *dib;
+       int                     i;
+       xfs_bmbt_ptr_t          *pp;
+       xfs_bmbt_rec_32_t       *rp;
+
+       dib = (xfs_bmdr_block_t *)XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_NOCONVERT);
+       if (INT_GET(dib->bb_level, ARCH_CONVERT) >= XFS_BM_MAXLEVELS(mp, whichfork)) {
+               if (!sflag || id->ilist)
+                       dbprintf("level for ino %lld %s fork bmap root too "
+                                "large (%u)\n",
+                               id->ino,
+                               whichfork == XFS_DATA_FORK ? "data" : "attr",
+                               INT_GET(dib->bb_level, ARCH_CONVERT));
+               error++;
+               return;
+       }
+       if (INT_GET(dib->bb_numrecs, ARCH_CONVERT) >
+           XFS_BTREE_BLOCK_MAXRECS(XFS_DFORK_SIZE_ARCH(dip, mp, whichfork, ARCH_NOCONVERT),
+                   xfs_bmdr, INT_GET(dib->bb_level, ARCH_CONVERT) == 0)) {
+               if (!sflag || id->ilist)
+                       dbprintf("numrecs for ino %lld %s fork bmap root too "
+                                "large (%u)\n",
+                               id->ino, 
+                               whichfork == XFS_DATA_FORK ? "data" : "attr",
+                               INT_GET(dib->bb_numrecs, ARCH_CONVERT));
+               error++;
+               return;
+       }
+       if (INT_GET(dib->bb_level, ARCH_CONVERT) == 0) {
+               rp = (xfs_bmbt_rec_32_t *)XFS_BTREE_REC_ADDR(
+                       XFS_DFORK_SIZE_ARCH(dip, mp, whichfork, ARCH_NOCONVERT),
+                       xfs_bmdr, dib, 1,
+                       XFS_BTREE_BLOCK_MAXRECS(XFS_DFORK_SIZE(dip, mp,
+                                       whichfork),
+                               xfs_bmdr, 1));
+               process_bmbt_reclist(rp, INT_GET(dib->bb_numrecs, ARCH_CONVERT), type, id, totd,
+                       blkmapp);
+               *nex += INT_GET(dib->bb_numrecs, ARCH_CONVERT);
+               return;
+       } else {
+               pp = XFS_BTREE_PTR_ADDR(XFS_DFORK_SIZE_ARCH(dip, mp, whichfork, ARCH_NOCONVERT),
+                       xfs_bmdr, dib, 1,
+                       XFS_BTREE_BLOCK_MAXRECS(XFS_DFORK_SIZE(dip, mp,
+                                                              whichfork),
+                                               xfs_bmdr, 0));
+               for (i = 0; i < INT_GET(dib->bb_numrecs, ARCH_CONVERT); i++)
+                       scan_lbtree((xfs_fsblock_t)INT_GET(pp[i], ARCH_CONVERT), INT_GET(dib->bb_level, ARCH_CONVERT),
+                               scanfunc_bmap, type, id, totd, toti, nex,
+                               blkmapp, 1,
+                               whichfork == XFS_DATA_FORK ?
+                                       TYP_BMAPBTD : TYP_BMAPBTA);
+       }
+       if (*nex <=
+           XFS_DFORK_SIZE_ARCH(dip, mp, whichfork, ARCH_NOCONVERT) / sizeof(xfs_bmbt_rec_t)) {
+               if (!sflag || id->ilist)
+                       dbprintf("extent count for ino %lld %s fork too low "
+                                "(%d) for file format\n",
+                               id->ino,
+                               whichfork == XFS_DATA_FORK ? "data" : "attr",
+                               *nex);
+               error++;
+       }
+}
+
+static xfs_ino_t
+process_data_dir_v2(
+       int                     *dot,
+       int                     *dotdot,
+       inodata_t               *id,
+       int                     v,
+       xfs_dablk_t             dabno,
+       freetab_t               **freetabp)
+{
+       xfs_dir2_dataptr_t      addr;
+       xfs_dir2_data_free_t    *bf;
+       int                     bf_err;
+       xfs_dir2_block_t        *block;
+       xfs_dir2_block_tail_t   *btp = NULL;
+       inodata_t               *cid;
+       int                     count;
+       xfs_dir2_data_t         *data;
+       xfs_dir2_db_t           db;
+       xfs_dir2_data_entry_t   *dep;
+       xfs_dir2_data_free_t    *dfp;
+       xfs_dir2_data_unused_t  *dup;
+       char                    *endptr;
+       int                     freeseen;
+       freetab_t               *freetab;
+       xfs_dahash_t            hash;
+       int                     i;
+       int                     lastfree;
+       int                     lastfree_err;
+       xfs_dir2_leaf_entry_t   *lep = NULL;
+       xfs_ino_t               lino;
+       xfs_ino_t               parent = 0;
+       char                    *ptr;
+       int                     stale = 0;
+       int                     tag_err;
+       xfs_dir2_data_off_t     *tagp;
+
+       data = iocur_top->data;
+       block = iocur_top->data;
+       if (INT_GET(block->hdr.magic, ARCH_CONVERT) != XFS_DIR2_BLOCK_MAGIC &&
+           INT_GET(data->hdr.magic, ARCH_CONVERT) != XFS_DIR2_DATA_MAGIC) {
+               if (!sflag || v)
+                       dbprintf("bad directory data magic # %#x for dir ino "
+                                "%lld block %d\n",
+                               INT_GET(data->hdr.magic, ARCH_CONVERT), id->ino, dabno);
+               error++;
+               return NULLFSINO;
+       }
+       db = XFS_DIR2_DA_TO_DB(mp, dabno);
+       bf = data->hdr.bestfree;
+       ptr = (char *)data->u;
+       if (INT_GET(block->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC) {
+               btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+               lep = XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT);
+               endptr = (char *)lep;
+               if (endptr <= ptr || endptr > (char *)btp) {
+                       endptr = (char *)data + mp->m_dirblksize;
+                       lep = NULL;
+                       if (!sflag || v)
+                               dbprintf("bad block directory tail for dir ino "
+                                        "%lld\n",
+                                       id->ino);
+                       error++;
+               }
+       } else
+               endptr = (char *)data + mp->m_dirblksize;
+       bf_err = lastfree_err = tag_err = 0;
+       count = lastfree = freeseen = 0;
+       if (INT_GET(bf[0].length, ARCH_CONVERT) == 0) {
+               bf_err += INT_GET(bf[0].offset, ARCH_CONVERT) != 0;
+               freeseen |= 1 << 0;
+       }
+       if (INT_GET(bf[1].length, ARCH_CONVERT) == 0) {
+               bf_err += INT_GET(bf[1].offset, ARCH_CONVERT) != 0;
+               freeseen |= 1 << 1;
+       }
+       if (INT_GET(bf[2].length, ARCH_CONVERT) == 0) {
+               bf_err += INT_GET(bf[2].offset, ARCH_CONVERT) != 0;
+               freeseen |= 1 << 2;
+       }
+       bf_err += INT_GET(bf[0].length, ARCH_CONVERT) < INT_GET(bf[1].length, ARCH_CONVERT);
+       bf_err += INT_GET(bf[1].length, ARCH_CONVERT) < INT_GET(bf[2].length, ARCH_CONVERT);
+       if (freetabp) {
+               freetab = *freetabp;
+               if (freetab->naents <= db) {
+                       *freetabp = freetab =
+                               realloc(freetab, FREETAB_SIZE(db + 1));
+                       for (i = freetab->naents; i < db; i++)
+                               freetab->ents[i] = NULLDATAOFF;
+                       freetab->naents = db + 1;
+               }
+               if (freetab->nents < db + 1)
+                       freetab->nents = db + 1;
+               freetab->ents[db] = INT_GET(bf[0].length, ARCH_CONVERT);
+       }
+       while (ptr < endptr) {
+               dup = (xfs_dir2_data_unused_t *)ptr;
+               if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG) {
+                       lastfree_err += lastfree != 0;
+                       if ((INT_GET(dup->length, ARCH_CONVERT) & (XFS_DIR2_DATA_ALIGN - 1)) ||
+                           INT_GET(dup->length, ARCH_CONVERT) == 0 ||
+                           (char *)(tagp = XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(dup, ARCH_CONVERT)) >=
+                           endptr) {
+                               if (!sflag || v)
+                                       dbprintf("dir %lld block %d bad free "
+                                                "entry at %d\n",
+                                               id->ino, dabno,
+                                               (int)((char *)dup -
+                                                     (char *)data));
+                               error++;
+                               break;
+                       }
+                       tag_err += INT_GET(*tagp, ARCH_CONVERT) != (char *)dup - (char *)data;
+                       dfp = process_data_dir_v2_freefind(data, dup);
+                       if (dfp) {
+                               i = (int)(dfp - bf);
+                               bf_err += (freeseen & (1 << i)) != 0;
+                               freeseen |= 1 << i;
+                       } else
+                               bf_err += INT_GET(dup->length, ARCH_CONVERT) > INT_GET(bf[2].length, ARCH_CONVERT);
+                       ptr += INT_GET(dup->length, ARCH_CONVERT);
+                       lastfree = 1;
+                       continue;
+               }
+               dep = (xfs_dir2_data_entry_t *)dup;
+               if (dep->namelen == 0) {
+                       if (!sflag || v)
+                               dbprintf("dir %lld block %d zero length entry "
+                                        "at %d\n",
+                                       id->ino, dabno,
+                                       (int)((char *)dep - (char *)data));
+                       error++;
+               }
+               tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
+               if ((char *)tagp >= endptr) {
+                       if (!sflag || v)
+                               dbprintf("dir %lld block %d bad entry at %d\n",
+                                       id->ino, dabno,
+                                       (int)((char *)dep - (char *)data));
+                       error++;
+                       break;
+               }
+               tag_err += INT_GET(*tagp, ARCH_CONVERT) != (char *)dep - (char *)data;
+               addr = XFS_DIR2_DB_OFF_TO_DATAPTR(mp, db,
+                       (char *)dep - (char *)data);
+               hash = libxfs_da_hashname((char *)dep->name, dep->namelen);
+               dir_hash_add(hash, addr);
+               ptr += XFS_DIR2_DATA_ENTSIZE(dep->namelen);
+               count++;
+               lastfree = 0;
+               lino = INT_GET(dep->inumber, ARCH_CONVERT);
+               cid = find_inode(lino, 1);
+               if (v)
+                       dbprintf("dir %lld block %d entry %*.*s %lld\n",
+                               id->ino, dabno, dep->namelen, dep->namelen,
+                               dep->name, lino);
+               if (cid)
+                       addlink_inode(cid);
+               else {
+                       if (!sflag || v)
+                               dbprintf("dir %lld block %d entry %*.*s bad "
+                                        "inode number %lld\n",
+                                       id->ino, dabno, dep->namelen,
+                                       dep->namelen, dep->name, lino);
+                       error++;
+               }
+               if (dep->namelen == 2 && dep->name[0] == '.' &&
+                   dep->name[1] == '.') {
+                       if (parent) {
+                               if (!sflag || v)
+                                       dbprintf("multiple .. entries in dir "
+                                                "%lld (%lld, %lld)\n",
+                                               id->ino, parent, lino);
+                               error++;
+                       } else
+                               parent = cid ? lino : NULLFSINO;
+                       (*dotdot)++;
+               } else if (dep->namelen != 1 || dep->name[0] != '.') {
+                       if (cid != NULL) {
+                               if (!cid->parent)
+                                       cid->parent = id;
+                               addname_inode(cid, (char *)dep->name,
+                                       dep->namelen);
+                       }
+               } else {
+                       if (lino != id->ino) {
+                               if (!sflag || v)
+                                       dbprintf("dir %lld entry . inode "
+                                                "number mismatch (%lld)\n",
+                                               id->ino, lino);
+                               error++;
+                       }
+                       (*dot)++;
+               }
+       }
+       if (INT_GET(data->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC) {
+               endptr = (char *)data + mp->m_dirblksize;
+               for (i = stale = 0; lep && i < INT_GET(btp->count, ARCH_CONVERT); i++) {
+                       if ((char *)&lep[i] >= endptr) {
+                               if (!sflag || v)
+                                       dbprintf("dir %lld block %d bad count "
+                                                "%u\n",
+                                               id->ino, dabno, INT_GET(btp->count, ARCH_CONVERT));
+                               error++;
+                               break;
+                       }
+                       if (INT_GET(lep[i].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
+                               stale++;
+                       else if (dir_hash_see(INT_GET(lep[i].hashval, ARCH_CONVERT), INT_GET(lep[i].address, ARCH_CONVERT))) {
+                               if (!sflag || v)
+                                       dbprintf("dir %lld block %d extra leaf "
+                                                "entry %x %x\n",
+                                               id->ino, dabno, INT_GET(lep[i].hashval, ARCH_CONVERT),
+                                               INT_GET(lep[i].address, ARCH_CONVERT));
+                               error++;
+                       }
+               }
+       }
+       bf_err += freeseen != 7;
+       if (bf_err) {
+               if (!sflag || v)
+                       dbprintf("dir %lld block %d bad bestfree data\n",
+                               id->ino, dabno);
+               error++;
+       }
+       if (INT_GET(data->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC &&
+           count != INT_GET(btp->count, ARCH_CONVERT) - INT_GET(btp->stale, ARCH_CONVERT)) {
+               if (!sflag || v)
+                       dbprintf("dir %lld block %d bad block tail count %d "
+                                "(stale %d)\n",
+                               id->ino, dabno, INT_GET(btp->count, ARCH_CONVERT), INT_GET(btp->stale, ARCH_CONVERT));
+               error++;
+       }
+       if (INT_GET(data->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC && stale != INT_GET(btp->stale, ARCH_CONVERT)) {
+               if (!sflag || v)
+                       dbprintf("dir %lld block %d bad stale tail count %d\n",
+                               id->ino, dabno, INT_GET(btp->stale, ARCH_CONVERT));
+               error++;
+       }
+       if (lastfree_err) {
+               if (!sflag || v)
+                       dbprintf("dir %lld block %d consecutive free entries\n",
+                               id->ino, dabno);
+               error++;
+       }
+       if (tag_err) {
+               if (!sflag || v)
+                       dbprintf("dir %lld block %d entry/unused tag "
+                                "mismatch\n",
+                               id->ino, dabno);
+               error++;
+       }
+       return parent;
+}
+
+static xfs_dir2_data_free_t *
+process_data_dir_v2_freefind(
+       xfs_dir2_data_t         *data,
+       xfs_dir2_data_unused_t  *dup)
+{
+       xfs_dir2_data_free_t    *dfp;
+       xfs_dir2_data_aoff_t    off;
+
+       off = (xfs_dir2_data_aoff_t)((char *)dup - (char *)data);
+       if (INT_GET(dup->length, ARCH_CONVERT) < INT_GET(data->hdr.bestfree[XFS_DIR2_DATA_FD_COUNT - 1].length, ARCH_CONVERT))
+               return NULL;
+       for (dfp = &data->hdr.bestfree[0];
+            dfp < &data->hdr.bestfree[XFS_DIR2_DATA_FD_COUNT];
+            dfp++) {
+               if (INT_GET(dfp->offset, ARCH_CONVERT) == 0)
+                       return NULL;
+               if (INT_GET(dfp->offset, ARCH_CONVERT) == off)
+                       return dfp;
+       }
+       return NULL;
+}
+
+static void
+process_dir(
+       xfs_dinode_t    *dip,
+       blkmap_t        *blkmap,
+       inodata_t       *id)
+{
+       xfs_fsblock_t   bno;
+       int             dot;
+       int             dotdot;
+       xfs_ino_t       parent;
+
+       dot = dotdot = 0;
+       if (XFS_DIR_IS_V2(mp)) {
+               if (process_dir_v2(dip, blkmap, &dot, &dotdot, id, &parent))
+                       return;
+       } else
+       {
+               if (process_dir_v1(dip, blkmap, &dot, &dotdot, id, &parent))
+                       return;
+       }
+       bno = XFS_INO_TO_FSB(mp, id->ino);
+       if (dot == 0) {
+               if (!sflag || id->ilist || CHECK_BLIST(bno))
+                       dbprintf("no . entry for directory %lld\n", id->ino);
+               error++;
+       }
+       if (dotdot == 0) {
+               if (!sflag || id->ilist || CHECK_BLIST(bno))
+                       dbprintf("no .. entry for directory %lld\n", id->ino);
+               error++;
+       } else if (parent == id->ino && id->ino != mp->m_sb.sb_rootino) {
+               if (!sflag || id->ilist || CHECK_BLIST(bno))
+                       dbprintf(". and .. same for non-root directory %lld\n",
+                               id->ino);
+               error++;
+       } else if (id->ino == mp->m_sb.sb_rootino && id->ino != parent) {
+               if (!sflag || id->ilist || CHECK_BLIST(bno))
+                       dbprintf("root directory %lld has .. %lld\n", id->ino,
+                               parent);
+               error++;
+       } else if (parent != NULLFSINO && id->ino != parent)
+               addparent_inode(id, parent);
+}
+
+static int
+process_dir_v1(
+       xfs_dinode_t    *dip,
+       blkmap_t        *blkmap,
+       int             *dot,
+       int             *dotdot,
+       inodata_t       *id,
+       xfs_ino_t       *parent)
+{
+       if (dip->di_core.di_size <= XFS_DFORK_DSIZE_ARCH(dip, mp, ARCH_NOCONVERT) &&
+           dip->di_core.di_format == XFS_DINODE_FMT_LOCAL)
+               *parent =
+                       process_shortform_dir_v1(dip, dot, dotdot, id);
+       else if (dip->di_core.di_size == XFS_LBSIZE(mp) &&
+                (dip->di_core.di_format == XFS_DINODE_FMT_EXTENTS ||
+                 dip->di_core.di_format == XFS_DINODE_FMT_BTREE))
+               *parent = process_leaf_dir_v1(blkmap, dot, dotdot, id);
+       else if (dip->di_core.di_size >= XFS_LBSIZE(mp) &&
+                 (dip->di_core.di_format == XFS_DINODE_FMT_EXTENTS ||
+                  dip->di_core.di_format == XFS_DINODE_FMT_BTREE))
+               *parent = process_node_dir_v1(blkmap, dot, dotdot, id);
+       else  {
+               dbprintf("bad size (%lld) or format (%d) for directory inode "
+                        "%lld\n",
+                       dip->di_core.di_size, (int)dip->di_core.di_format,
+                       id->ino);
+               error++;
+               return 1;
+       }
+       return 0;
+}
+
+static int
+process_dir_v2(
+       xfs_dinode_t    *dip,
+       blkmap_t        *blkmap,
+       int             *dot,
+       int             *dotdot,
+       inodata_t       *id,
+       xfs_ino_t       *parent)
+{
+       xfs_fileoff_t   last = 0;
+
+       if (blkmap)
+               last = blkmap_last_off(blkmap);
+       if (dip->di_core.di_size <= XFS_DFORK_DSIZE_ARCH(dip, mp, ARCH_NOCONVERT) &&
+           dip->di_core.di_format == XFS_DINODE_FMT_LOCAL)
+               *parent = process_sf_dir_v2(dip, dot, dotdot, id);
+       else if (last == mp->m_dirblkfsbs &&
+                (dip->di_core.di_format == XFS_DINODE_FMT_EXTENTS ||
+                 dip->di_core.di_format == XFS_DINODE_FMT_BTREE))
+               *parent = process_block_dir_v2(blkmap, dot, dotdot, id);
+       else if (last >= mp->m_dirleafblk + mp->m_dirblkfsbs &&
+                (dip->di_core.di_format == XFS_DINODE_FMT_EXTENTS ||
+                 dip->di_core.di_format == XFS_DINODE_FMT_BTREE))
+               *parent = process_leaf_node_dir_v2(blkmap, dot, dotdot, id,
+                       dip->di_core.di_size);
+       else  {
+               dbprintf("bad size (%lld) or format (%d) for directory inode "
+                        "%lld\n",
+                       dip->di_core.di_size, (int)dip->di_core.di_format,
+                       id->ino);
+               error++;
+               return 1;
+       }
+       return 0;
+}
+
+/* ARGSUSED */
+static void
+process_exinode(
+       inodata_t               *id,
+       xfs_dinode_t            *dip,
+       dbm_t                   type,
+       xfs_drfsbno_t           *totd,
+       xfs_drfsbno_t           *toti,
+       xfs_extnum_t            *nex,
+       blkmap_t                **blkmapp,
+       int                     whichfork)
+{
+       xfs_bmbt_rec_32_t       *rp;
+
+       rp = (xfs_bmbt_rec_32_t *)XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_NOCONVERT);
+       *nex = XFS_DFORK_NEXTENTS_ARCH(dip, whichfork, ARCH_NOCONVERT);
+       if (*nex < 0 ||
+           *nex >
+           XFS_DFORK_SIZE_ARCH(dip, mp, whichfork, ARCH_NOCONVERT) / sizeof(xfs_bmbt_rec_32_t)) {
+               if (!sflag || id->ilist)
+                       dbprintf("bad number of extents %d for inode %lld\n",
+                               *nex, id->ino);
+               error++;
+               return;
+       }
+       process_bmbt_reclist(rp, *nex, type, id, totd, blkmapp);
+}
+
+static void
+process_inode(
+       xfs_agf_t               *agf,
+       xfs_agino_t             agino,
+       xfs_dinode_t            *dip,
+       int                     isfree)
+{
+       blkmap_t                *blkmap;
+       xfs_fsblock_t           bno = 0;
+       xfs_dinode_core_t       tdic;
+       xfs_dinode_core_t       *dic;
+       inodata_t               *id = NULL;
+       xfs_ino_t               ino;
+       xfs_extnum_t            nextents = 0;
+       int                     nlink;
+       int                     security;
+       xfs_drfsbno_t           totblocks;
+       xfs_drfsbno_t           totdblocks = 0;
+       xfs_drfsbno_t           totiblocks = 0;
+       dbm_t                   type;
+       xfs_extnum_t            anextents = 0;
+       xfs_drfsbno_t           atotdblocks = 0;
+       xfs_drfsbno_t           atotiblocks = 0;
+       xfs_qcnt_t              bc = 0;
+       xfs_qcnt_t              ic = 0;
+       xfs_qcnt_t              rc = 0;
+       static char             okfmts[] = {
+               0,                              /* type 0 unused */
+               1 << XFS_DINODE_FMT_DEV,        /* FIFO */
+               1 << XFS_DINODE_FMT_DEV,        /* CHR */
+               0,                              /* type 3 unused */
+               (1 << XFS_DINODE_FMT_LOCAL) |
+               (1 << XFS_DINODE_FMT_EXTENTS) |
+               (1 << XFS_DINODE_FMT_BTREE),    /* DIR */
+               0,                              /* type 5 unused */
+               1 << XFS_DINODE_FMT_DEV,        /* BLK */
+               0,                              /* type 7 unused */
+               (1 << XFS_DINODE_FMT_EXTENTS) |
+               (1 << XFS_DINODE_FMT_BTREE),    /* REG */
+               0,                              /* type 9 unused */
+               (1 << XFS_DINODE_FMT_LOCAL) |
+               (1 << XFS_DINODE_FMT_EXTENTS),  /* LNK */
+               0,                              /* type 11 unused */
+               1 << XFS_DINODE_FMT_DEV,        /* SOCK */
+               0,                              /* type 13 unused */
+               1 << XFS_DINODE_FMT_UUID,       /* MNT */
+               0                               /* type 15 unused */
+       };
+       static char             *fmtnames[] = {
+               "dev", "local", "extents", "btree", "uuid"
+       };
+
+        /* convert the core, then copy it back into the inode */
+       libxfs_xlate_dinode_core((xfs_caddr_t)&dip->di_core, &tdic, 1,
+                                ARCH_CONVERT);
+       memcpy(&dip->di_core, &tdic, sizeof(xfs_dinode_core_t));
+       dic=&dip->di_core;
+
+       ino = XFS_AGINO_TO_INO(mp, INT_GET(agf->agf_seqno, ARCH_CONVERT), agino);
+       if (!isfree) {
+               id = find_inode(ino, 1);
+               bno = XFS_INO_TO_FSB(mp, ino);
+               blkmap = NULL;
+       }
+       if (dic->di_magic != XFS_DINODE_MAGIC) {
+               if (!sflag || isfree || id->ilist || CHECK_BLIST(bno))
+                       dbprintf("bad magic number %#x for inode %lld\n",
+                               dic->di_magic, ino);
+               error++;
+               return;
+       }
+       if (!XFS_DINODE_GOOD_VERSION(dic->di_version)) {
+               if (!sflag || isfree || id->ilist || CHECK_BLIST(bno))
+                       dbprintf("bad version number %#x for inode %lld\n",
+                               dic->di_version, ino);
+               error++;
+               return;
+       }
+       if (isfree) {
+               if (dic->di_nblocks != 0) {
+                       if (!sflag || id->ilist || CHECK_BLIST(bno))
+                               dbprintf("bad nblocks %lld for free inode "
+                                        "%lld\n",
+                                       dic->di_nblocks, ino);
+                       error++;
+               }
+               if (dic->di_version == XFS_DINODE_VERSION_1)
+                       nlink = dic->di_onlink;
+               else
+                       nlink = dic->di_nlink;
+               if (nlink != 0) {
+                       if (!sflag || id->ilist || CHECK_BLIST(bno))
+                               dbprintf("bad nlink %d for free inode %lld\n",
+                                       nlink, ino);
+                       error++;
+               }
+               if (dic->di_mode != 0) {
+                       if (!sflag || id->ilist || CHECK_BLIST(bno))
+                               dbprintf("bad mode %#o for free inode %lld\n",
+                                       dic->di_mode, ino);
+                       error++;
+               }
+               return;
+       }
+       /*
+        * di_mode is a 16-bit uint so no need to check the < 0 case
+        */
+       if ((((dic->di_mode & IFMT) >> 12) > 15) ||
+           (!(okfmts[(dic->di_mode & IFMT) >> 12] & (1 << dic->di_format)))) {
+               if (!sflag || id->ilist || CHECK_BLIST(bno))
+                       dbprintf("bad format %d for inode %lld type %#o\n",
+                               dic->di_format, id->ino, dic->di_mode & IFMT);
+               error++;
+               return;
+       }
+       if ((unsigned int)XFS_DFORK_ASIZE_ARCH(dip, mp, ARCH_NOCONVERT) >= XFS_LITINO(mp))  {
+               if (!sflag || id->ilist)
+                       dbprintf("bad fork offset %d for inode %lld\n",
+                               dic->di_forkoff, id->ino);
+               error++;
+               return;
+       }
+       if ((unsigned int)dic->di_aformat > XFS_DINODE_FMT_BTREE)  {
+               if (!sflag || id->ilist)
+                       dbprintf("bad attribute format %d for inode %lld\n",
+                               dic->di_aformat, id->ino);
+               error++;
+               return;
+       }
+       if (verbose || id->ilist || CHECK_BLIST(bno))
+               dbprintf("inode %lld mode %#o fmt %s "
+                        "afmt %s "
+                        "nex %d anex %d nblk %lld sz %lld%s%s\n",
+                       id->ino, dic->di_mode, fmtnames[dic->di_format],
+                       fmtnames[dic->di_aformat],
+                       dic->di_nextents,
+                       dic->di_anextents,
+                       dic->di_nblocks, dic->di_size,
+                       dic->di_flags & XFS_DIFLAG_REALTIME ? " rt" : "",
+                       dic->di_flags & XFS_DIFLAG_PREALLOC ? " pre" : ""
+                               );
+       security = 0;
+       switch (dic->di_mode & IFMT) {
+       case IFDIR:
+               type = DBM_DIR;
+               if (dic->di_format == XFS_DINODE_FMT_LOCAL)
+                       break;
+               blkmap = blkmap_alloc(dic->di_nextents);
+               break;
+       case IFREG:
+               if (dic->di_flags & XFS_DIFLAG_REALTIME)
+                       type = DBM_RTDATA;
+               else if (id->ino == mp->m_sb.sb_rbmino) {
+                       type = DBM_RTBITMAP;
+                       blkmap = blkmap_alloc(dic->di_nextents);
+                       addlink_inode(id);
+               } else if (id->ino == mp->m_sb.sb_rsumino) {
+                       type = DBM_RTSUM;
+                       blkmap = blkmap_alloc(dic->di_nextents);
+                       addlink_inode(id);
+               }
+               else if (id->ino == mp->m_sb.sb_uquotino ||
+                        id->ino == mp->m_sb.sb_pquotino) {
+                       type = DBM_QUOTA;
+                       blkmap = blkmap_alloc(dic->di_nextents);
+                       addlink_inode(id);
+               }
+               else
+                       type = DBM_DATA;
+               if (dic->di_mode & (ISUID | ISGID))
+                       security = 1;
+               break;
+       case IFLNK:
+               type = DBM_SYMLINK;
+               break;
+       default:
+               security = 1;
+               type = DBM_UNKNOWN;
+               break;
+       }
+       if (dic->di_version == XFS_DINODE_VERSION_1)
+               setlink_inode(id, dic->di_onlink, type == DBM_DIR, security);
+       else {
+               sbversion |= XFS_SB_VERSION_NLINKBIT;
+               setlink_inode(id, dic->di_nlink, type == DBM_DIR, security);
+       }
+       switch (dic->di_format) {
+       case XFS_DINODE_FMT_LOCAL:
+               process_lclinode(id, dip, type, &totdblocks, &totiblocks,
+                       &nextents, &blkmap, XFS_DATA_FORK);
+               break;
+       case XFS_DINODE_FMT_EXTENTS:
+               process_exinode(id, dip, type, &totdblocks, &totiblocks,
+                       &nextents, &blkmap, XFS_DATA_FORK);
+               break;
+       case XFS_DINODE_FMT_BTREE:
+               process_btinode(id, dip, type, &totdblocks, &totiblocks,
+                       &nextents, &blkmap, XFS_DATA_FORK);
+               break;
+       }
+       if (XFS_DFORK_Q_ARCH(dip, ARCH_NOCONVERT)) {
+               sbversion |= XFS_SB_VERSION_ATTRBIT;
+               switch (dic->di_aformat) {
+               case XFS_DINODE_FMT_LOCAL:
+                       process_lclinode(id, dip, DBM_ATTR, &atotdblocks,
+                               &atotiblocks, &anextents, NULL, XFS_ATTR_FORK);
+                       break;
+               case XFS_DINODE_FMT_EXTENTS:
+                       process_exinode(id, dip, DBM_ATTR, &atotdblocks,
+                               &atotiblocks, &anextents, NULL, XFS_ATTR_FORK);
+                       break;
+               case XFS_DINODE_FMT_BTREE:
+                       process_btinode(id, dip, DBM_ATTR, &atotdblocks,
+                               &atotiblocks, &anextents, NULL, XFS_ATTR_FORK);
+                       break;
+               }
+       }
+       if (qpdo || qudo) {
+               switch (type) {
+               case DBM_DATA:
+               case DBM_DIR:
+               case DBM_RTBITMAP:
+               case DBM_RTSUM:
+               case DBM_SYMLINK:
+               case DBM_UNKNOWN:
+                       bc = totdblocks + totiblocks +
+                            atotdblocks + atotiblocks;
+                       ic = 1;
+                       break;
+               case DBM_RTDATA:
+                       bc = totiblocks + atotdblocks + atotiblocks;
+                       rc = totdblocks;
+                       ic = 1;
+                       break;
+               default:
+               }
+               if (ic)
+                       quota_add(dic->di_version >= XFS_DINODE_VERSION_2 ?
+                                       dic->di_projid : -1,
+                                 dic->di_uid, 0, bc, ic, rc);
+       }
+       totblocks = totdblocks + totiblocks + atotdblocks + atotiblocks;
+       if (totblocks != dic->di_nblocks) {
+               if (!sflag || id->ilist || CHECK_BLIST(bno))
+                       dbprintf("bad nblocks %lld for inode %lld, counted "
+                                "%lld\n",
+                               dic->di_nblocks, id->ino, totblocks);
+               error++;
+       }
+       if (nextents != dic->di_nextents) {
+               if (!sflag || id->ilist || CHECK_BLIST(bno))
+                       dbprintf("bad nextents %d for inode %lld, counted %d\n",
+                               dic->di_nextents, id->ino, nextents);
+               error++;
+       }
+       if (anextents != dic->di_anextents) {
+               if (!sflag || id->ilist || CHECK_BLIST(bno))
+                       dbprintf("bad anextents %d for inode %lld, counted "
+                                "%d\n",
+                               dic->di_anextents, id->ino, anextents);
+               error++;
+       }
+       if (type == DBM_DIR)
+               process_dir(dip, blkmap, id);
+       else if (type == DBM_RTBITMAP)
+               process_rtbitmap(blkmap);
+       else if (type == DBM_RTSUM)
+               process_rtsummary(blkmap);
+       /*
+        * If the CHKD flag is not set, this can legitimately contain garbage;
+        * xfs_repair may have cleared that bit.
+        */
+       else if (type == DBM_QUOTA && (mp->m_sb.sb_qflags & XFS_UQUOTA_CHKD))
+               process_quota(id->ino == mp->m_sb.sb_pquotino, id, blkmap);
+       if (blkmap)
+               blkmap_free(blkmap);
+}
+
+/* ARGSUSED */
+static void
+process_lclinode(
+       inodata_t               *id,
+       xfs_dinode_t            *dip,
+       dbm_t                   type,
+       xfs_drfsbno_t           *totd,
+       xfs_drfsbno_t           *toti,
+       xfs_extnum_t            *nex,
+       blkmap_t                **blkmapp,
+       int                     whichfork)
+{
+       xfs_attr_shortform_t    *asf;
+       xfs_fsblock_t           bno;
+       xfs_dinode_core_t       *dic;
+
+       dic = &dip->di_core;
+       bno = XFS_INO_TO_FSB(mp, id->ino);
+       if (whichfork == XFS_DATA_FORK &&
+           dic->di_size > XFS_DFORK_DSIZE_ARCH(dip, mp, ARCH_NOCONVERT)) {
+               if (!sflag || id->ilist || CHECK_BLIST(bno))
+                       dbprintf("local inode %lld data is too large (size "
+                                "%lld)\n",
+                               id->ino, dic->di_size);
+               error++;
+       }
+       else if (whichfork == XFS_ATTR_FORK) {
+               asf = (xfs_attr_shortform_t *)XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_NOCONVERT);
+               if (INT_GET(asf->hdr.totsize, ARCH_CONVERT) > XFS_DFORK_ASIZE_ARCH(dip, mp, ARCH_NOCONVERT)) {
+                       if (!sflag || id->ilist || CHECK_BLIST(bno))
+                               dbprintf("local inode %lld attr is too large "
+                                        "(size %d)\n",
+                                       id->ino, INT_GET(asf->hdr.totsize, ARCH_CONVERT));
+                       error++;
+               }
+       }
+}
+
+static xfs_ino_t
+process_leaf_dir_v1(
+       blkmap_t        *blkmap,
+       int             *dot,
+       int             *dotdot,
+       inodata_t       *id)
+{
+       xfs_fsblock_t   bno;
+       xfs_ino_t       parent;
+
+       bno = blkmap_get(blkmap, 0);
+       if (bno == NULLFSBLOCK) {
+               if (!sflag || id->ilist)
+                       dbprintf("block 0 for directory inode %lld is "
+                                "missing\n",
+                               id->ino);
+               error++;
+               return 0;
+       }
+       push_cur();
+       set_cur(&typtab[TYP_DIR], XFS_FSB_TO_DADDR(mp, bno), blkbb, DB_RING_IGN,
+               NULL);
+       if (iocur_top->data == NULL) {
+               if (!sflag || id->ilist || CHECK_BLIST(bno))
+                       dbprintf("can't read block 0 for directory inode "
+                                "%lld\n",
+                               id->ino);
+               error++;
+               return 0;
+       }
+       parent = process_leaf_dir_v1_int(dot, dotdot, id);
+       pop_cur();
+       return parent;
+}
+
+static xfs_ino_t
+process_leaf_dir_v1_int(
+       int                     *dot,
+       int                     *dotdot,
+       inodata_t               *id)
+{
+       xfs_fsblock_t           bno;
+       inodata_t               *cid;
+       xfs_dir_leaf_entry_t    *entry;
+       int                     i;
+       xfs_dir_leafblock_t     *leaf;
+       xfs_ino_t               lino;
+       xfs_dir_leaf_name_t     *namest;
+       xfs_ino_t               parent = 0;
+       int                     v;
+
+       bno = XFS_DADDR_TO_FSB(mp, iocur_top->bb);
+       v = verbose || id->ilist || CHECK_BLIST(bno);
+       leaf = iocur_top->data;
+       if (INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) != XFS_DIR_LEAF_MAGIC) {
+               if (!sflag || id->ilist || CHECK_BLIST(bno))
+                       dbprintf("bad directory leaf magic # %#x for dir ino "
+                                "%lld\n",
+                               INT_GET(leaf->hdr.info.magic, ARCH_CONVERT), id->ino);
+               error++;
+               return NULLFSINO;
+       }
+       entry = &leaf->entries[0];
+       for (i = 0; i < INT_GET(leaf->hdr.count, ARCH_CONVERT); entry++, i++) {
+               namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT));
+                lino=DIRINO_GET_ARCH(&namest->inumber, ARCH_CONVERT);
+               cid = find_inode(lino, 1);
+               if (v)
+                       dbprintf("dir %lld entry %*.*s %lld\n", id->ino,
+                               entry->namelen, entry->namelen, namest->name,
+                               lino);
+               if (cid)
+                       addlink_inode(cid);
+               else {
+                       if (!sflag)
+                               dbprintf("dir %lld entry %*.*s bad inode "
+                                        "number %lld\n",
+                                       id->ino, entry->namelen, entry->namelen,
+                                       namest->name, lino);
+                       error++;
+               }
+               if (entry->namelen == 2 && namest->name[0] == '.' &&
+                   namest->name[1] == '.') {
+                       if (parent) {
+                               if (!sflag || id->ilist || CHECK_BLIST(bno))
+                                       dbprintf("multiple .. entries in dir "
+                                                "%lld (%lld, %lld)\n",
+                                               id->ino, parent, lino);
+                               error++;
+                       } else
+                               parent = cid ? lino : NULLFSINO;
+                       (*dotdot)++;
+               } else if (entry->namelen != 1 || namest->name[0] != '.') {
+                       if (cid != NULL) {
+                               if (!cid->parent)
+                                       cid->parent = id;
+                               addname_inode(cid, (char *)namest->name,
+                                       entry->namelen);
+                       }
+               } else {
+                       if (lino != id->ino) {
+                               if (!sflag)
+                                       dbprintf("dir %lld entry . inode "
+                                                "number mismatch (%lld)\n",
+                                               id->ino, lino);
+                               error++;
+                       }
+                       (*dot)++;
+               }
+       }
+       return parent;
+}
+
+static xfs_ino_t
+process_leaf_node_dir_v2(
+       blkmap_t                *blkmap,
+       int                     *dot,
+       int                     *dotdot,
+       inodata_t               *id,
+       xfs_fsize_t             dirsize)
+{
+       xfs_fsblock_t           b;
+       bbmap_t                 bbmap;
+       bmap_ext_t              *bmp;
+       xfs_fileoff_t           dbno;
+       freetab_t               *freetab;
+       int                     i;
+       xfs_ino_t               lino;
+       int                     nex;
+       xfs_ino_t               parent;
+       int                     t;
+       int                     v;
+       int                     v2;
+       int                     x;
+
+       v2 = verbose || id->ilist;
+       v = parent = 0;
+       dbno = NULLFILEOFF;
+       freetab = malloc(FREETAB_SIZE(dirsize / mp->m_dirblksize));
+       freetab->naents = (int)(dirsize / mp->m_dirblksize);
+       freetab->nents = 0;
+       for (i = 0; i < freetab->naents; i++)
+               freetab->ents[i] = NULLDATAOFF;
+       dir_hash_init();
+       while ((dbno = blkmap_next_off(blkmap, dbno, &t)) != NULLFILEOFF) {
+               nex = blkmap_getn(blkmap, dbno, mp->m_dirblkfsbs, &bmp);
+               ASSERT(nex > 0);
+               for (v = v2, x = 0; !v && x < nex; x++) {
+                       for (b = bmp[x].startblock;
+                            !v && b < bmp[x].startblock + bmp[x].blockcount;
+                            b++)
+                               v = CHECK_BLIST(b);
+               }
+               if (v)
+                       dbprintf("dir inode %lld block %u=%llu\n", id->ino,
+                               (__uint32_t)dbno,
+                               (xfs_dfsbno_t)bmp->startblock);
+               push_cur();
+               if (nex > 1)
+                       make_bbmap(&bbmap, nex, bmp);
+               set_cur(&typtab[TYP_DIR], XFS_FSB_TO_DADDR(mp, bmp->startblock),
+                       mp->m_dirblkfsbs * blkbb, DB_RING_IGN,
+                       nex > 1 ? &bbmap : NULL);
+               free(bmp);
+               if (iocur_top->data == NULL) {
+                       if (!sflag || v)
+                               dbprintf("can't read block %u for directory "
+                                        "inode %lld\n",
+                                       (__uint32_t)dbno, id->ino);
+                       error++;
+                       pop_cur();
+                       dbno += mp->m_dirblkfsbs - 1;
+                       continue;
+               }
+               if (dbno < mp->m_dirleafblk) {
+                       lino = process_data_dir_v2(dot, dotdot, id, v,
+                               (xfs_dablk_t)dbno, &freetab);
+                       if (lino) {
+                               if (parent) {
+                                       if (!sflag || v)
+                                               dbprintf("multiple .. entries "
+                                                        "in dir %lld\n",
+                                                       id->ino);
+                                       error++;
+                               } else
+                                       parent = lino;
+                       }
+               } else if (dbno < mp->m_dirfreeblk) {
+                       process_leaf_node_dir_v2_int(id, v, (xfs_dablk_t)dbno,
+                               freetab);
+               } else {
+                       process_leaf_node_dir_v2_free(id, v, (xfs_dablk_t)dbno,
+                               freetab);
+               }
+               pop_cur();
+               dbno += mp->m_dirblkfsbs - 1;
+       }
+       dir_hash_check(id, v);
+       dir_hash_done();
+       for (i = 0; i < freetab->nents; i++) {
+               if (freetab->ents[i] != NULLDATAOFF) {
+                       if (!sflag || v)
+                               dbprintf("missing free index for data block %d "
+                                        "in dir ino %lld\n",
+                                       XFS_DIR2_DB_TO_DA(mp, i), id->ino);
+                       error++;
+               }
+       }
+       free(freetab);
+       return parent;
+}
+
+static void
+process_leaf_node_dir_v2_free(
+       inodata_t               *id,
+       int                     v,
+       xfs_dablk_t             dabno,
+       freetab_t               *freetab)
+{
+       xfs_dir2_data_off_t     ent;
+       xfs_dir2_free_t         *free;
+       int                     i;
+       int                     maxent;
+       int                     used;
+
+       free = iocur_top->data;
+       if (INT_GET(free->hdr.magic, ARCH_CONVERT) != XFS_DIR2_FREE_MAGIC) {
+               if (!sflag || v)
+                       dbprintf("bad free block magic # %#x for dir ino %lld "
+                                "block %d\n",
+                               INT_GET(free->hdr.magic, ARCH_CONVERT), id->ino, dabno);
+               error++;
+               return;
+       }
+       maxent = XFS_DIR2_MAX_FREE_BESTS(mp);
+       if (INT_GET(free->hdr.firstdb, ARCH_CONVERT) !=
+           XFS_DIR2_DA_TO_DB(mp, dabno - mp->m_dirfreeblk) * maxent) {
+               if (!sflag || v)
+                       dbprintf("bad free block firstdb %d for dir ino %lld "
+                                "block %d\n",
+                               INT_GET(free->hdr.firstdb, ARCH_CONVERT), id->ino, dabno);
+               error++;
+               return;
+       }
+       if (INT_GET(free->hdr.nvalid, ARCH_CONVERT) > maxent || INT_GET(free->hdr.nvalid, ARCH_CONVERT) < 0 ||
+           INT_GET(free->hdr.nused, ARCH_CONVERT) > maxent || INT_GET(free->hdr.nused, ARCH_CONVERT) < 0 ||
+           INT_GET(free->hdr.nused, ARCH_CONVERT) > INT_GET(free->hdr.nvalid, ARCH_CONVERT)) {
+               if (!sflag || v)
+                       dbprintf("bad free block nvalid/nused %d/%d for dir "
+                                "ino %lld block %d\n",
+                               INT_GET(free->hdr.nvalid, ARCH_CONVERT), INT_GET(free->hdr.nused, ARCH_CONVERT), id->ino,
+                               dabno);
+               error++;
+               return;
+       }
+       for (used = i = 0; i < INT_GET(free->hdr.nvalid, ARCH_CONVERT); i++) {
+               if (freetab->nents <= INT_GET(free->hdr.firstdb, ARCH_CONVERT) + i)
+                       ent = NULLDATAOFF;
+               else
+                       ent = freetab->ents[INT_GET(free->hdr.firstdb, ARCH_CONVERT) + i];
+               if (ent != INT_GET(free->bests[i], ARCH_CONVERT)) {
+                       if (!sflag || v)
+                               dbprintf("bad free block ent %d is %d should "
+                                        "be %d for dir ino %lld block %d\n",
+                                       i, INT_GET(free->bests[i], ARCH_CONVERT), ent, id->ino, dabno);
+                       error++;
+               }
+               if (INT_GET(free->bests[i], ARCH_CONVERT) != NULLDATAOFF)
+                       used++;
+               if (ent != NULLDATAOFF)
+                       freetab->ents[INT_GET(free->hdr.firstdb, ARCH_CONVERT) + i] = NULLDATAOFF;
+       }
+       if (used != INT_GET(free->hdr.nused, ARCH_CONVERT)) {
+               if (!sflag || v)
+                       dbprintf("bad free block nused %d should be %d for dir "
+                                "ino %lld block %d\n",
+                               INT_GET(free->hdr.nused, ARCH_CONVERT), used, id->ino, dabno);
+               error++;
+       }
+}
+
+static void
+process_leaf_node_dir_v2_int(
+       inodata_t               *id,
+       int                     v,
+       xfs_dablk_t             dabno,
+       freetab_t               *freetab)
+{
+       int                     i;
+       xfs_dir2_data_off_t     *lbp;
+       xfs_dir2_leaf_t         *leaf;
+       xfs_dir2_leaf_entry_t   *lep;
+       xfs_dir2_leaf_tail_t    *ltp;
+       xfs_da_intnode_t        *node;
+       int                     stale;
+
+       leaf = iocur_top->data;
+       switch (INT_GET(leaf->hdr.info.magic, ARCH_CONVERT)) {
+       case XFS_DIR2_LEAF1_MAGIC:
+               if (INT_GET(leaf->hdr.info.forw, ARCH_CONVERT) || INT_GET(leaf->hdr.info.back, ARCH_CONVERT)) {
+                       if (!sflag || v)
+                               dbprintf("bad leaf block forw/back pointers "
+                                        "%d/%d for dir ino %lld block %d\n",
+                                       INT_GET(leaf->hdr.info.forw, ARCH_CONVERT),
+                                       INT_GET(leaf->hdr.info.back, ARCH_CONVERT), id->ino, dabno);
+                       error++;
+               }
+               if (dabno != mp->m_dirleafblk) {
+                       if (!sflag || v)
+                               dbprintf("single leaf block for dir ino %lld "
+                                        "block %d should be at block %d\n",
+                                       id->ino, dabno,
+                                       (xfs_dablk_t)mp->m_dirleafblk);
+                       error++;
+               }
+               ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+               lbp = XFS_DIR2_LEAF_BESTS_P_ARCH(ltp, ARCH_CONVERT);
+               for (i = 0; i < INT_GET(ltp->bestcount, ARCH_CONVERT); i++) {
+                       if (freetab->nents <= i || freetab->ents[i] != INT_GET(lbp[i], ARCH_CONVERT)) {
+                               if (!sflag || v)
+                                       dbprintf("bestfree %d for dir ino %lld "
+                                                "block %d doesn't match table "
+                                                "value %d\n",
+                                               freetab->nents <= i ?
+                                                       NULLDATAOFF :
+                                                       freetab->ents[i],
+                                               id->ino,
+                                               XFS_DIR2_DB_TO_DA(mp, i),
+                                               INT_GET(lbp[i], ARCH_CONVERT));
+                       }
+                       if (freetab->nents > i)
+                               freetab->ents[i] = NULLDATAOFF;
+               }
+               break;
+       case XFS_DIR2_LEAFN_MAGIC:
+               /* if it's at the root location then we can check the 
+                * pointers are null XXX */
+               break;
+       case XFS_DA_NODE_MAGIC:
+               node = iocur_top->data;
+               if (INT_GET(node->hdr.level, ARCH_CONVERT) < 1 ||
+                   INT_GET(node->hdr.level, ARCH_CONVERT) > XFS_DA_NODE_MAXDEPTH) {
+                       if (!sflag || v)
+                               dbprintf("bad node block level %d for dir ino "
+                                        "%lld block %d\n",
+                                       INT_GET(node->hdr.level, ARCH_CONVERT), id->ino, dabno);
+                       error++;
+               }
+               return;
+       default:
+               if (!sflag || v)
+                       dbprintf("bad directory data magic # %#x for dir ino "
+                                "%lld block %d\n",
+                               INT_GET(leaf->hdr.info.magic, ARCH_CONVERT), id->ino, dabno);
+               error++;
+               return;
+       }
+       lep = leaf->ents;
+       for (i = stale = 0; i < INT_GET(leaf->hdr.count, ARCH_CONVERT); i++) {
+               if (INT_GET(lep[i].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
+                       stale++;
+               else if (dir_hash_see(INT_GET(lep[i].hashval, ARCH_CONVERT), INT_GET(lep[i].address, ARCH_CONVERT))) {
+                       if (!sflag || v)
+                               dbprintf("dir %lld block %d extra leaf entry "
+                                        "%x %x\n",
+                                       id->ino, dabno, INT_GET(lep[i].hashval, ARCH_CONVERT),
+                                       INT_GET(lep[i].address, ARCH_CONVERT));
+                       error++;
+               }
+       }
+       if (stale != INT_GET(leaf->hdr.stale, ARCH_CONVERT)) {
+               if (!sflag || v)
+                       dbprintf("dir %lld block %d stale mismatch "
+                                "%d/%d\n",
+                                id->ino, dabno, stale,
+                                INT_GET(leaf->hdr.stale, ARCH_CONVERT));
+               error++;
+       }
+}
+
+static xfs_ino_t
+process_node_dir_v1(
+       blkmap_t                *blkmap,
+       int                     *dot,
+       int                     *dotdot,
+       inodata_t               *id)
+{
+       xfs_fsblock_t           bno;
+       xfs_fileoff_t           dbno;
+       xfs_ino_t               lino;
+       xfs_da_intnode_t        *node;
+       xfs_ino_t               parent;
+       int                     t;
+       int                     v;
+       int                     v2;
+
+       v = verbose || id->ilist;
+       parent = 0;
+       dbno = NULLFILEOFF;
+       while ((dbno = blkmap_next_off(blkmap, dbno, &t)) != NULLFILEOFF) {
+               bno = blkmap_get(blkmap, dbno);
+               v2 = bno != NULLFSBLOCK && CHECK_BLIST(bno);
+               if (bno == NULLFSBLOCK && dbno == 0) {
+                       if (!sflag || v)
+                               dbprintf("can't read root block for directory "
+                                        "inode %lld\n",
+                                       id->ino);
+                       error++;
+               }
+               if (v || v2)
+                       dbprintf("dir inode %lld block %u=%llu\n", id->ino,
+                               (__uint32_t)dbno, (xfs_dfsbno_t)bno);
+               if (bno == NULLFSBLOCK)
+                       continue;
+               push_cur();
+               set_cur(&typtab[TYP_DIR], XFS_FSB_TO_DADDR(mp, bno), blkbb,
+                       DB_RING_IGN, NULL);
+               if ((node = iocur_top->data) == NULL) {
+                       if (!sflag || v || v2)
+                               dbprintf("can't read block %u for directory "
+                                        "inode %lld\n",
+                                       (__uint32_t)dbno, id->ino);
+                       error++;
+                       continue;
+               }
+#if VERS >= V_62
+               if (INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC)
+#else
+               if (INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_NODE_MAGIC)
+#endif
+               {
+                       pop_cur();
+                       continue;
+               }
+               lino = process_leaf_dir_v1_int(dot, dotdot, id);
+               if (lino) {
+                       if (parent) {
+                               if (!sflag || v || v2)
+                                       dbprintf("multiple .. entries in dir "
+                                                "%lld\n",
+                                               id->ino);
+                               error++;
+                       } else
+                               parent = lino;
+               }
+               pop_cur();
+       }
+       return parent;
+}
+
+static void
+process_quota(
+       int             isproj,
+       inodata_t       *id,
+       blkmap_t        *blkmap)
+{
+       xfs_fsblock_t   bno;
+       int             cb;
+       xfs_dqblk_t     *dqb;
+       xfs_dqid_t      dqid;
+       u_int8_t        exp_flags;
+       int             i;
+       int             perblock;
+       xfs_fileoff_t   qbno;
+       char            *s;
+       int             scicb;
+       int             t;
+
+       perblock = (int)(mp->m_sb.sb_blocksize / sizeof(*dqb));
+       s = isproj ? "project" : "user";
+       exp_flags = isproj ? XFS_DQ_PROJ : XFS_DQ_USER;
+       dqid = 0;
+       qbno = NULLFILEOFF;
+       while ((qbno = blkmap_next_off(blkmap, qbno, &t)) !=
+              NULLFILEOFF) {
+               bno = blkmap_get(blkmap, qbno);
+               dqid = (xfs_dqid_t)qbno * perblock;
+               cb = CHECK_BLIST(bno);
+               scicb = !sflag || id->ilist || cb;
+               push_cur();
+               set_cur(&typtab[TYP_DQBLK], XFS_FSB_TO_DADDR(mp, bno), blkbb,
+                       DB_RING_IGN, NULL);
+               if ((dqb = iocur_top->data) == NULL) {
+                       pop_cur();
+                       if (scicb)
+                               dbprintf("can't read block %lld for %s quota "  
+                                        "inode (fsblock %lld)\n",
+                                       (xfs_dfiloff_t)qbno, s,
+                                       (xfs_dfsbno_t)bno);
+                       error++;
+                       continue;
+               }
+               for (i = 0; i < perblock; i++, dqid++, dqb++) {
+                       if (verbose || id->ilist || cb)
+                               dbprintf("%s dqblk %lld entry %d id %d bc "
+                                        "%lld ic %lld rc %lld\n",
+                                       s, (xfs_dfiloff_t)qbno, i, dqid,
+                                       INT_GET(dqb->dd_diskdq.d_bcount, ARCH_CONVERT),
+                                       INT_GET(dqb->dd_diskdq.d_icount, ARCH_CONVERT),
+                                       INT_GET(dqb->dd_diskdq.d_rtbcount, ARCH_CONVERT));
+                       if (INT_GET(dqb->dd_diskdq.d_magic, ARCH_CONVERT) != XFS_DQUOT_MAGIC) {
+                               if (scicb)
+                                       dbprintf("bad magic number %#x for %s " 
+                                                "dqblk %lld entry %d id %d\n",
+                                               INT_GET(dqb->dd_diskdq.d_magic, ARCH_CONVERT), s,
+                                               (xfs_dfiloff_t)qbno, i, dqid);
+                               error++;
+                               continue;
+                       }
+                       if (INT_GET(dqb->dd_diskdq.d_version, ARCH_CONVERT) != XFS_DQUOT_VERSION) {
+                               if (scicb)
+                                       dbprintf("bad version number %#x for "
+                                                "%s dqblk %lld entry %d id "
+                                                "%d\n",
+                                               INT_GET(dqb->dd_diskdq.d_version, ARCH_CONVERT), s,
+                                               (xfs_dfiloff_t)qbno, i, dqid);
+                               error++;
+                               continue;
+                       }
+                       if (INT_GET(dqb->dd_diskdq.d_flags, ARCH_CONVERT) != exp_flags) {
+                               if (scicb)
+                                       dbprintf("bad flags %#x for %s dqblk "
+                                                "%lld entry %d id %d\n",
+                                               INT_GET(dqb->dd_diskdq.d_flags, ARCH_CONVERT), s,
+                                               (xfs_dfiloff_t)qbno, i, dqid);
+                               error++;
+                               continue;
+                       }
+                       if (INT_GET(dqb->dd_diskdq.d_id, ARCH_CONVERT) != dqid) {
+                               if (scicb)
+                                       dbprintf("bad id %d for %s dqblk %lld "
+                                                "entry %d id %d\n",
+                                               INT_GET(dqb->dd_diskdq.d_id, ARCH_CONVERT), s,
+                                               (xfs_dfiloff_t)qbno, i, dqid);
+                               error++;
+                               continue;
+                       }
+                       quota_add(isproj ? dqid : -1, isproj ? -1 : dqid, 1,
+                                 INT_GET(dqb->dd_diskdq.d_bcount, ARCH_CONVERT),
+                                 INT_GET(dqb->dd_diskdq.d_icount, ARCH_CONVERT),
+                                 INT_GET(dqb->dd_diskdq.d_rtbcount, ARCH_CONVERT));
+               }
+               pop_cur();
+       }
+}
+
+static void
+process_rtbitmap(
+       blkmap_t        *blkmap)
+{
+#define xfs_highbit64 libxfs_highbit64 /* for XFS_RTBLOCKLOG macro */
+       int             bit;
+       int             bitsperblock;
+       xfs_fileoff_t   bmbno;
+       xfs_fsblock_t   bno;
+       xfs_drtbno_t    extno;
+       int             len;
+       int             log;
+       int             offs;
+       int             prevbit;
+       xfs_drfsbno_t   rtbno;
+       int             start_bmbno;
+       int             start_bit;
+       int             t;
+       xfs_rtword_t    *words;
+
+       bitsperblock = mp->m_sb.sb_blocksize * NBBY;
+       bit = extno = prevbit = start_bmbno = start_bit = 0;
+       bmbno = NULLFILEOFF;
+       while ((bmbno = blkmap_next_off(blkmap, bmbno, &t)) !=
+              NULLFILEOFF) {
+               bno = blkmap_get(blkmap, bmbno);
+               if (bno == NULLFSBLOCK) {
+                       if (!sflag)
+                               dbprintf("block %lld for rtbitmap inode is "
+                                        "missing\n",
+                                       (xfs_dfiloff_t)bmbno);
+                       error++;
+                       continue;
+               }
+               push_cur();
+               set_cur(&typtab[TYP_RTBITMAP], XFS_FSB_TO_DADDR(mp, bno), blkbb,
+                       DB_RING_IGN, NULL);
+               if ((words = iocur_top->data) == NULL) {
+                       pop_cur();
+                       if (!sflag)
+                               dbprintf("can't read block %lld for rtbitmap "
+                                        "inode\n",
+                                       (xfs_dfiloff_t)bmbno);
+                       error++;
+                       continue;
+               }
+               for (bit = 0;
+                    bit < bitsperblock && extno < mp->m_sb.sb_rextents;
+                    bit++, extno++) {
+                       if (isset(words, bit)) {
+                               rtbno = extno * mp->m_sb.sb_rextsize;
+                               set_rdbmap(rtbno, mp->m_sb.sb_rextsize,
+                                       DBM_RTFREE);
+                               frextents++;
+                               if (prevbit == 0) {
+                                       start_bmbno = (int)bmbno;
+                                       start_bit = bit;
+                                       prevbit = 1;
+                               }
+                       } else if (prevbit == 1) {
+                               len = ((int)bmbno - start_bmbno) *
+                                       bitsperblock + (bit - start_bit);
+                               log = XFS_RTBLOCKLOG(len);
+                               offs = XFS_SUMOFFS(mp, log, start_bmbno);
+                               sumcompute[offs]++;
+                               prevbit = 0;
+                       }
+               }
+               pop_cur();
+               if (extno == mp->m_sb.sb_rextents)
+                       break;
+       }
+       if (prevbit == 1) {
+               len = ((int)bmbno - start_bmbno) * bitsperblock +
+                       (bit - start_bit);
+               log = XFS_RTBLOCKLOG(len);
+               offs = XFS_SUMOFFS(mp, log, start_bmbno);
+               sumcompute[offs]++;
+       }
+}
+
+static void
+process_rtsummary(
+       blkmap_t        *blkmap)
+{
+       xfs_fsblock_t   bno;
+       char            *bytes;
+       xfs_fileoff_t   sumbno;
+       int             t;
+
+       sumbno = NULLFILEOFF;
+       while ((sumbno = blkmap_next_off(blkmap, sumbno, &t)) !=
+              NULLFILEOFF) {
+               bno = blkmap_get(blkmap, sumbno);
+               if (bno == NULLFSBLOCK) {
+                       if (!sflag)
+                               dbprintf("block %lld for rtsummary inode is "
+                                        "missing\n",
+                                       (xfs_dfiloff_t)sumbno);
+                       error++;
+                       continue;
+               }
+               push_cur();
+               set_cur(&typtab[TYP_RTSUMMARY], XFS_FSB_TO_DADDR(mp, bno),
+                       blkbb, DB_RING_IGN, NULL);
+               if ((bytes = iocur_top->data) == NULL) {
+                       if (!sflag)
+                               dbprintf("can't read block %lld for rtsummary "
+                                        "inode\n",
+                                       (xfs_dfiloff_t)sumbno);
+                       error++;
+                       continue;
+               }
+               memcpy((char *)sumfile + sumbno * mp->m_sb.sb_blocksize, bytes,
+                       mp->m_sb.sb_blocksize);
+               pop_cur();
+       }
+}
+
+static xfs_ino_t
+process_sf_dir_v2(
+       xfs_dinode_t            *dip,
+       int                     *dot,
+       int                     *dotdot,
+       inodata_t               *id)
+{
+       inodata_t               *cid;
+       int                     i;
+       int                     i8;
+       xfs_ino_t               lino;
+       int                     offset;
+       xfs_dir2_sf_t           *sf;
+       xfs_dir2_sf_entry_t     *sfe;
+       int                     v;
+
+       sf = &dip->di_u.di_dir2sf;
+       addlink_inode(id);
+       v = verbose || id->ilist;
+       if (v)
+               dbprintf("dir %lld entry . %lld\n", id->ino, id->ino);
+       (*dot)++;
+       sfe = XFS_DIR2_SF_FIRSTENTRY(sf);
+       offset = XFS_DIR2_DATA_FIRST_OFFSET;
+       for (i = INT_GET(sf->hdr.count, ARCH_CONVERT) - 1, i8 = 0; i >= 0; i--) {
+               if ((__psint_t)sfe + XFS_DIR2_SF_ENTSIZE_BYENTRY(sf, sfe) -
+                   (__psint_t)sf > dip->di_core.di_size) {
+                       if (!sflag)
+                               dbprintf("dir %llu bad size in entry at %d\n",
+                                       id->ino,
+                                       (int)((char *)sfe - (char *)sf));
+                       error++;
+                       break;
+               }
+               lino = XFS_DIR2_SF_GET_INUMBER_ARCH(sf, XFS_DIR2_SF_INUMBERP(sfe), ARCH_CONVERT);
+               if (lino > XFS_DIR2_MAX_SHORT_INUM)
+                       i8++;
+               cid = find_inode(lino, 1);
+               if (cid == NULL) {
+                       if (!sflag)
+                               dbprintf("dir %lld entry %*.*s bad inode "
+                                        "number %lld\n",
+                                       id->ino, sfe->namelen, sfe->namelen,
+                                       sfe->name, lino);
+                       error++;
+               } else {
+                       addlink_inode(cid);
+                       if (!cid->parent)
+                               cid->parent = id;
+                       addname_inode(cid, (char *)sfe->name, sfe->namelen);
+               }
+               if (v)
+                       dbprintf("dir %lld entry %*.*s offset %d %lld\n",
+                               id->ino, sfe->namelen, sfe->namelen, sfe->name,
+                               XFS_DIR2_SF_GET_OFFSET_ARCH(sfe, ARCH_CONVERT), lino);
+               if (XFS_DIR2_SF_GET_OFFSET_ARCH(sfe, ARCH_CONVERT) < offset) {
+                       if (!sflag)
+                               dbprintf("dir %lld entry %*.*s bad offset %d\n",
+                                       id->ino, sfe->namelen, sfe->namelen,
+                                       sfe->name, XFS_DIR2_SF_GET_OFFSET_ARCH(sfe, ARCH_CONVERT));
+                       error++;
+               }
+               offset =
+                       XFS_DIR2_SF_GET_OFFSET_ARCH(sfe, ARCH_CONVERT) +
+                       XFS_DIR2_DATA_ENTSIZE(sfe->namelen);
+               sfe = XFS_DIR2_SF_NEXTENTRY(sf, sfe);
+       }
+       if (i < 0 && (__psint_t)sfe - (__psint_t)sf != dip->di_core.di_size) {
+               if (!sflag)
+                       dbprintf("dir %llu size is %lld, should be %u\n",
+                               id->ino, dip->di_core.di_size,
+                               (uint)((char *)sfe - (char *)sf));
+               error++;
+       }
+       if (offset + (INT_GET(sf->hdr.count, ARCH_CONVERT) + 2) * sizeof(xfs_dir2_leaf_entry_t) +
+           sizeof(xfs_dir2_block_tail_t) > mp->m_dirblksize) {
+               if (!sflag)
+                       dbprintf("dir %llu offsets too high\n", id->ino);
+               error++;
+       }
+       lino = XFS_DIR2_SF_GET_INUMBER_ARCH(sf, &sf->hdr.parent, ARCH_CONVERT);
+       if (lino > XFS_DIR2_MAX_SHORT_INUM)
+               i8++;
+       cid = find_inode(lino, 1);
+       if (cid)
+               addlink_inode(cid);
+       else {
+               if (!sflag)
+                       dbprintf("dir %lld entry .. bad inode number %lld\n",
+                               id->ino, lino);
+               error++;
+       }
+       if (v)
+               dbprintf("dir %lld entry .. %lld\n", id->ino, lino);
+       if (i8 != sf->hdr.i8count) {
+               if (!sflag)
+                       dbprintf("dir %lld i8count mismatch is %d should be "
+                                "%d\n",
+                               id->ino, sf->hdr.i8count, i8);
+               error++;
+       }
+       (*dotdot)++;
+       return cid ? lino : NULLFSINO;
+}
+
+static xfs_ino_t
+process_shortform_dir_v1(
+       xfs_dinode_t            *dip,
+       int                     *dot,
+       int                     *dotdot,
+       inodata_t               *id)
+{
+       inodata_t               *cid;
+       int                     i;
+       xfs_ino_t               lino;
+       xfs_dir_shortform_t     *sf;
+       xfs_dir_sf_entry_t      *sfe;
+       int                     v;
+
+       sf = &dip->di_u.di_dirsf;
+       addlink_inode(id);
+       v = verbose || id->ilist;
+       if (v)
+               dbprintf("dir %lld entry . %lld\n", id->ino, id->ino);
+       (*dot)++;
+       sfe = &sf->list[0];
+       for (i = INT_GET(sf->hdr.count, ARCH_CONVERT) - 1; i >= 0; i--) {
+                lino = DIRINO_GET_ARCH(&sfe->inumber, ARCH_CONVERT);
+               cid = find_inode(lino, 1);
+               if (cid == NULL) {
+                       if (!sflag)
+                               dbprintf("dir %lld entry %*.*s bad inode "
+                                        "number %lld\n",
+                                       id->ino, sfe->namelen, sfe->namelen,
+                                       sfe->name, lino);
+                       error++;
+               } else {
+                       addlink_inode(cid);
+                       if (!cid->parent)
+                               cid->parent = id;
+                       addname_inode(cid, (char *)sfe->name, sfe->namelen);
+               }
+               if (v)
+                       dbprintf("dir %lld entry %*.*s %lld\n", id->ino,
+                               sfe->namelen, sfe->namelen, sfe->name, lino);
+               sfe = XFS_DIR_SF_NEXTENTRY(sfe);
+       }
+       if ((__psint_t)sfe - (__psint_t)sf != dip->di_core.di_size)
+               dbprintf("dir %llu size is %lld, should be %d\n",
+                       id->ino, dip->di_core.di_size,
+                       (int)((char *)sfe - (char *)sf));
+        lino=DIRINO_GET_ARCH(&sf->hdr.parent, ARCH_CONVERT);
+       cid = find_inode(lino, 1);
+       if (cid)
+               addlink_inode(cid);
+       else {
+               if (!sflag)
+                       dbprintf("dir %lld entry .. bad inode number %lld\n",
+                               id->ino, lino);
+               error++;
+       }
+       if (v)
+               dbprintf("dir %lld entry .. %lld\n", id->ino, lino);
+       (*dotdot)++;
+       return cid ? lino : NULLFSINO;
+}
+
+static void
+quota_add(
+       xfs_dqid_t      projid,
+       xfs_dqid_t      userid,
+       int             dq,
+       xfs_qcnt_t      bc,
+       xfs_qcnt_t      ic,
+       xfs_qcnt_t      rc)
+{
+       if (qudo && userid != -1)
+               quota_add1(qudata, userid, dq, bc, ic, rc);
+       if (qpdo && projid != -1)
+               quota_add1(qpdata, projid, dq, bc, ic, rc);
+}
+
+static void
+quota_add1(
+       qdata_t         **qt,
+       xfs_dqid_t      id,
+       int             dq,
+       xfs_qcnt_t      bc,
+       xfs_qcnt_t      ic,
+       xfs_qcnt_t      rc)
+{
+       qdata_t         *qe;
+       int             qh;
+       qinfo_t         *qi;
+
+       qh = (int)((__uint32_t)id % QDATA_HASH_SIZE);
+       qe = qt[qh];
+       while (qe) {
+               if (qe->id == id) {
+                       qi = dq ? &qe->dq : &qe->count;
+                       qi->bc += bc;
+                       qi->ic += ic;
+                       qi->rc += rc;
+                       return;
+               }
+               qe = qe->next;
+       }
+       qe = xmalloc(sizeof(*qe));
+       qe->id = id;
+       qi = dq ? &qe->dq : &qe->count;
+       qi->bc = bc;
+       qi->ic = ic;
+       qi->rc = rc;
+       qi = dq ? &qe->count : &qe->dq;
+       qi->bc = qi->ic = qi->rc = 0;
+       qe->next = qt[qh];
+       qt[qh] = qe;
+}
+
+static void
+quota_check(
+       char    *s,
+       qdata_t **qt)
+{
+       int     i;
+       qdata_t *next;
+       qdata_t *qp;
+
+       for (i = 0; i < QDATA_HASH_SIZE; i++) {
+               qp = qt[i];
+               while (qp) {
+                       next = qp->next;
+                       if (qp->count.bc != qp->dq.bc ||
+                           qp->count.ic != qp->dq.ic ||
+                           qp->count.rc != qp->dq.rc) {
+                               if (!sflag) {
+                                       dbprintf("%s quota id %d, have/exp",
+                                               s, qp->id);
+                                       if (qp->count.bc != qp->dq.bc)
+                                               dbprintf(" bc %lld/%lld",
+                                                       qp->dq.bc,
+                                                       qp->count.bc);
+                                       if (qp->count.ic != qp->dq.ic)
+                                               dbprintf(" ic %lld/%lld",
+                                                       qp->dq.ic,
+                                                       qp->count.ic);
+                                       if (qp->count.rc != qp->dq.rc)
+                                               dbprintf(" rc %lld/%lld",
+                                                       qp->dq.rc,
+                                                       qp->count.rc);
+                                       dbprintf("\n");
+                               }
+                               error++;
+                       }
+                       xfree(qp);
+                       qp = next;
+               }
+       }
+       xfree(qt);
+}
+
+static void
+quota_init(void)
+{
+       qudo = mp->m_sb.sb_uquotino != 0 &&
+              mp->m_sb.sb_uquotino != NULLFSINO &&
+              (mp->m_sb.sb_qflags & XFS_UQUOTA_CHKD);
+       qpdo = mp->m_sb.sb_pquotino != 0 &&
+              mp->m_sb.sb_pquotino != NULLFSINO &&
+              (mp->m_sb.sb_qflags & XFS_PQUOTA_CHKD);
+       if (qudo)
+               qudata = xcalloc(QDATA_HASH_SIZE, sizeof(qdata_t *));
+       if (qpdo)
+               qpdata = xcalloc(QDATA_HASH_SIZE, sizeof(qdata_t *));
+}
+
+static void
+scan_ag(
+       xfs_agnumber_t  agno)
+{
+       xfs_agf_t       *agf;
+       xfs_agi_t       *agi;
+       int             i;
+       xfs_sb_t        tsb;
+       xfs_sb_t        *sb=&tsb;
+
+       agffreeblks = agflongest = 0;
+       agicount = agifreecount = 0;
+       push_cur();
+       set_cur(&typtab[TYP_SB], XFS_AG_DADDR(mp, agno, XFS_SB_DADDR), 1,
+               DB_RING_IGN, NULL);
+        
+       if (!iocur_top->data) {
+               dbprintf("can't read superblock for ag %u\n", agno);
+               pop_cur();
+               serious_error++;
+               return;
+       }
+       libxfs_xlate_sb(iocur_top->data, sb, 1, ARCH_CONVERT, XFS_SB_ALL_BITS);
+       if (sb->sb_magicnum != XFS_SB_MAGIC) {
+               if (!sflag)
+                       dbprintf("bad sb magic # %#x in ag %u\n",
+                               sb->sb_magicnum, agno);
+               error++;
+       }
+       if (!XFS_SB_GOOD_VERSION(sb)) {
+               if (!sflag)
+                       dbprintf("bad sb version # %#x in ag %u\n",
+                               sb->sb_versionnum, agno);
+               error++;
+               sbver_err++;
+       }
+       if (agno == 0 && sb->sb_inprogress != 0) {
+               if (!sflag)
+                       dbprintf("mkfs not completed successfully\n");
+               error++;
+       }
+       set_dbmap(agno, XFS_SB_BLOCK(mp), 1, DBM_SB, agno, XFS_SB_BLOCK(mp));
+       if (sb->sb_logstart && XFS_FSB_TO_AGNO(mp, sb->sb_logstart) == agno)
+               set_dbmap(agno, XFS_FSB_TO_AGBNO(mp, sb->sb_logstart),
+                       sb->sb_logblocks, DBM_LOG, agno, XFS_SB_BLOCK(mp));
+       push_cur();
+       set_cur(&typtab[TYP_AGF], XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR), 1,
+               DB_RING_IGN, NULL);
+       if ((agf = iocur_top->data) == NULL) {
+               dbprintf("can't read agf block for ag %u\n", agno);
+               pop_cur();
+               pop_cur();
+               serious_error++;
+               return;
+       }
+       if (INT_GET(agf->agf_magicnum, ARCH_CONVERT) != XFS_AGF_MAGIC) {
+               if (!sflag)
+                       dbprintf("bad agf magic # %#x in ag %u\n",
+                               INT_GET(agf->agf_magicnum, ARCH_CONVERT), agno);
+               error++;
+       }
+       if (!XFS_AGF_GOOD_VERSION(INT_GET(agf->agf_versionnum, ARCH_CONVERT))) {
+               if (!sflag)
+                       dbprintf("bad agf version # %#x in ag %u\n",
+                               INT_GET(agf->agf_versionnum, ARCH_CONVERT), agno);
+               error++;
+       }
+       if (XFS_SB_BLOCK(mp) != XFS_AGF_BLOCK(mp))
+               set_dbmap(agno, XFS_AGF_BLOCK(mp), 1, DBM_AGF, agno,
+                       XFS_SB_BLOCK(mp));
+       if (sb->sb_agblocks > INT_GET(agf->agf_length, ARCH_CONVERT))
+               set_dbmap(agno, INT_GET(agf->agf_length, ARCH_CONVERT),
+                       sb->sb_agblocks - INT_GET(agf->agf_length, ARCH_CONVERT),
+                       DBM_MISSING, agno, XFS_SB_BLOCK(mp));
+       push_cur();
+       set_cur(&typtab[TYP_AGI], XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR), 1,
+               DB_RING_IGN, NULL);
+       if ((agi = iocur_top->data) == NULL) {
+               dbprintf("can't read agi block for ag %u\n", agno);
+               serious_error++;
+               pop_cur();
+               pop_cur();
+               pop_cur();
+               return;
+       }
+       if (INT_GET(agi->agi_magicnum, ARCH_CONVERT) != XFS_AGI_MAGIC) {
+               if (!sflag)
+                       dbprintf("bad agi magic # %#x in ag %u\n",
+                               INT_GET(agi->agi_magicnum, ARCH_CONVERT), agno);
+               error++;
+       }
+       if (!XFS_AGI_GOOD_VERSION(INT_GET(agi->agi_versionnum, ARCH_CONVERT))) {
+               if (!sflag)
+                       dbprintf("bad agi version # %#x in ag %u\n",
+                               INT_GET(agi->agi_versionnum, ARCH_CONVERT), agno);
+               error++;
+       }
+       if (XFS_SB_BLOCK(mp) != XFS_AGI_BLOCK(mp) &&
+           XFS_AGF_BLOCK(mp) != XFS_AGI_BLOCK(mp))
+               set_dbmap(agno, XFS_AGI_BLOCK(mp), 1, DBM_AGI, agno,
+                       XFS_SB_BLOCK(mp));
+       scan_freelist(agf);
+       fdblocks--;
+       scan_sbtree(agf,
+               INT_GET(agf->agf_roots[XFS_BTNUM_BNO], ARCH_CONVERT),
+               INT_GET(agf->agf_levels[XFS_BTNUM_BNO], ARCH_CONVERT),
+               1, scanfunc_bno, TYP_BNOBT);
+       fdblocks--;
+       scan_sbtree(agf,
+               INT_GET(agf->agf_roots[XFS_BTNUM_CNT], ARCH_CONVERT),
+               INT_GET(agf->agf_levels[XFS_BTNUM_CNT], ARCH_CONVERT),
+               1, scanfunc_cnt, TYP_CNTBT);
+       scan_sbtree(agf,
+               INT_GET(agi->agi_root, ARCH_CONVERT),
+               INT_GET(agi->agi_level, ARCH_CONVERT),
+               1, scanfunc_ino, TYP_INOBT);
+       if (INT_GET(agf->agf_freeblks, ARCH_CONVERT) != agffreeblks) {
+               if (!sflag)
+                       dbprintf("agf_freeblks %u, counted %u in ag %u\n",
+                               INT_GET(agf->agf_freeblks, ARCH_CONVERT),
+                               agffreeblks, agno);
+               error++;
+       }
+       if (INT_GET(agf->agf_longest, ARCH_CONVERT) != agflongest) {
+               if (!sflag)
+                       dbprintf("agf_longest %u, counted %u in ag %u\n",
+                               INT_GET(agf->agf_longest, ARCH_CONVERT),
+                               agflongest, agno);
+               error++;
+       }
+       if (INT_GET(agi->agi_count, ARCH_CONVERT) != agicount) {
+               if (!sflag)
+                       dbprintf("agi_count %u, counted %u in ag %u\n",
+                               INT_GET(agi->agi_count, ARCH_CONVERT),
+                               agicount, agno);
+               error++;
+       }
+       if (INT_GET(agi->agi_freecount, ARCH_CONVERT) != agifreecount) {
+               if (!sflag)
+                       dbprintf("agi_freecount %u, counted %u in ag %u\n",
+                               INT_GET(agi->agi_freecount, ARCH_CONVERT),
+                               agifreecount, agno);
+               error++;
+       }
+       for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) {
+               if (INT_GET(agi->agi_unlinked[i], ARCH_CONVERT) != NULLAGINO) {
+                       if (!sflag) {
+                                xfs_agino_t agino=INT_GET(agi->agi_unlinked[i], ARCH_CONVERT);
+                               dbprintf("agi unlinked bucket %d is %u in ag "
+                                        "%u (inode=%lld)\n", i, agino, agno,
+                                        XFS_AGINO_TO_INO(mp, agno, agino));
+                        }
+                       error++;
+               }
+       }
+       pop_cur();
+       pop_cur();
+       pop_cur();
+}
+
+static void
+scan_freelist(
+       xfs_agf_t       *agf)
+{
+       xfs_agnumber_t  seqno = INT_GET(agf->agf_seqno, ARCH_CONVERT);
+       xfs_agfl_t      *agfl;
+       xfs_agblock_t   bno;
+       uint            count;
+       int             i;
+
+       if (XFS_SB_BLOCK(mp) != XFS_AGFL_BLOCK(mp) &&
+           XFS_AGF_BLOCK(mp) != XFS_AGFL_BLOCK(mp) &&
+           XFS_AGI_BLOCK(mp) != XFS_AGFL_BLOCK(mp))
+               set_dbmap(seqno, XFS_AGFL_BLOCK(mp), 1, DBM_AGFL, seqno,
+                       XFS_SB_BLOCK(mp));
+       if (INT_GET(agf->agf_flcount, ARCH_CONVERT) == 0)
+               return;
+       push_cur();
+       set_cur(&typtab[TYP_AGFL],
+               XFS_AG_DADDR(mp, seqno, XFS_AGFL_DADDR), 1, DB_RING_IGN, NULL);
+       if ((agfl = iocur_top->data) == NULL) {
+               dbprintf("can't read agfl block for ag %u\n", seqno);
+               serious_error++;
+               return;
+       }
+       i = INT_GET(agf->agf_flfirst, ARCH_CONVERT);
+       count = 0;
+       for (;;) {
+               bno = INT_GET(agfl->agfl_bno[i], ARCH_CONVERT);
+               set_dbmap(seqno, bno, 1, DBM_FREELIST, seqno,
+                       XFS_AGFL_BLOCK(mp));
+               count++;
+               if (i == INT_GET(agf->agf_fllast, ARCH_CONVERT))
+                       break;
+               if (++i == XFS_AGFL_SIZE)
+                       i = 0;
+       }
+       if (count != INT_GET(agf->agf_flcount, ARCH_CONVERT)) {
+               if (!sflag)
+                       dbprintf("freeblk count %u != flcount %u in ag %u\n",
+                               count, INT_GET(agf->agf_flcount, ARCH_CONVERT),
+                               seqno);
+               error++;
+       }
+       fdblocks += count;
+       pop_cur();
+}
+
+static void
+scan_lbtree(
+       xfs_fsblock_t   root,
+       int             nlevels,
+       scan_lbtree_f_t func,
+       dbm_t           type,
+       inodata_t       *id,
+       xfs_drfsbno_t   *totd,
+       xfs_drfsbno_t   *toti,
+       xfs_extnum_t    *nex,
+       blkmap_t        **blkmapp,
+       int             isroot,
+       typnm_t         btype)
+{
+       push_cur();
+       set_cur(&typtab[btype], XFS_FSB_TO_DADDR(mp, root), blkbb, DB_RING_IGN,
+               NULL);
+       if (iocur_top->data == NULL) {
+               if (!sflag)
+                       dbprintf("can't read btree block %u/%u\n",
+                               XFS_FSB_TO_AGNO(mp, root),
+                               XFS_FSB_TO_AGBNO(mp, root));
+               error++;
+               return;
+       }
+       (*func)(iocur_top->data, nlevels - 1, type, root, id, totd, toti, nex,
+               blkmapp, isroot, btype);
+       pop_cur();
+}
+
+static void
+scan_sbtree(
+       xfs_agf_t       *agf,
+       xfs_agblock_t   root,
+       int             nlevels,
+       int             isroot,
+       scan_sbtree_f_t func,
+       typnm_t         btype)
+{
+       xfs_agnumber_t  seqno = INT_GET(agf->agf_seqno, ARCH_CONVERT);
+
+       push_cur();
+       set_cur(&typtab[btype],
+               XFS_AGB_TO_DADDR(mp, seqno, root), blkbb, DB_RING_IGN, NULL);
+       if (iocur_top->data == NULL) {
+               if (!sflag)
+                       dbprintf("can't read btree block %u/%u\n", seqno, root);
+               error++;
+               return;
+       }
+       (*func)(iocur_top->data, nlevels - 1, agf, root, isroot);
+       pop_cur();
+}
+
+static void
+scanfunc_bmap(
+       xfs_btree_lblock_t      *ablock,
+       int                     level,
+       dbm_t                   type,
+       xfs_fsblock_t           bno,
+       inodata_t               *id,
+       xfs_drfsbno_t           *totd,
+       xfs_drfsbno_t           *toti,
+       xfs_extnum_t            *nex,
+       blkmap_t                **blkmapp,
+       int                     isroot,
+       typnm_t                 btype)
+{
+       xfs_agblock_t           agbno;
+       xfs_agnumber_t          agno;
+       xfs_bmbt_block_t        *block = (xfs_bmbt_block_t *)ablock;
+       int                     i;
+       xfs_bmbt_ptr_t          *pp;
+       xfs_bmbt_rec_32_t       *rp;
+
+       agno = XFS_FSB_TO_AGNO(mp, bno);
+       agbno = XFS_FSB_TO_AGBNO(mp, bno);
+       if (INT_GET(block->bb_magic, ARCH_CONVERT) != XFS_BMAP_MAGIC) {
+               if (!sflag || id->ilist || CHECK_BLIST(bno))
+                       dbprintf("bad magic # %#x in inode %lld bmbt block "
+                                "%u/%u\n",
+                               INT_GET(block->bb_magic, ARCH_CONVERT), id->ino, agno, agbno);
+               error++;
+       }
+       if (INT_GET(block->bb_level, ARCH_CONVERT) != level) {
+               if (!sflag || id->ilist || CHECK_BLIST(bno))
+                       dbprintf("expected level %d got %d in inode %lld bmbt "
+                                "block %u/%u\n",
+                               level, INT_GET(block->bb_level, ARCH_CONVERT), id->ino, agno, agbno);
+               error++;
+       }
+       set_dbmap(agno, agbno, 1, type, agno, agbno);
+       set_inomap(agno, agbno, 1, id);
+       (*toti)++;
+       if (level == 0) {
+               if (INT_GET(block->bb_numrecs, ARCH_CONVERT) > mp->m_bmap_dmxr[0] ||
+                   isroot == 0 && INT_GET(block->bb_numrecs, ARCH_CONVERT) < mp->m_bmap_dmnr[0])  {
+                       if (!sflag || id->ilist || CHECK_BLIST(bno))
+                               dbprintf("bad btree nrecs (%u, min=%u, max=%u) "
+                                        "in inode %lld bmap block %lld\n",
+                                       INT_GET(block->bb_numrecs, ARCH_CONVERT), mp->m_bmap_dmnr[0],
+                                       mp->m_bmap_dmxr[0], id->ino,
+                                       (xfs_dfsbno_t)bno);
+                       error++;
+                       return;
+               }
+               rp = (xfs_bmbt_rec_32_t *)
+                       XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt,
+                       block, 1, mp->m_bmap_dmxr[0]);
+               *nex += INT_GET(block->bb_numrecs, ARCH_CONVERT);
+               process_bmbt_reclist(rp, INT_GET(block->bb_numrecs, ARCH_CONVERT), type, id, totd,
+                       blkmapp);
+               return;
+       }
+       if (INT_GET(block->bb_numrecs, ARCH_CONVERT) > mp->m_bmap_dmxr[1] ||
+           isroot == 0 && INT_GET(block->bb_numrecs, ARCH_CONVERT) < mp->m_bmap_dmnr[1])  {
+               if (!sflag || id->ilist || CHECK_BLIST(bno))
+                       dbprintf("bad btree nrecs (%u, min=%u, max=%u) in "
+                                "inode %lld bmap block %lld\n",
+                               INT_GET(block->bb_numrecs, ARCH_CONVERT), mp->m_bmap_dmnr[1],
+                               mp->m_bmap_dmxr[1], id->ino, (xfs_dfsbno_t)bno);
+               error++;
+               return;
+       }
+       pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, block, 1,
+               mp->m_bmap_dmxr[0]);
+       for (i = 0; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++)
+               scan_lbtree(INT_GET(pp[i], ARCH_CONVERT), level, scanfunc_bmap, type, id, totd, toti,
+                       nex, blkmapp, 0, btype);
+}
+
+static void
+scanfunc_bno(
+       xfs_btree_sblock_t      *ablock,
+       int                     level,
+       xfs_agf_t               *agf,
+       xfs_agblock_t           bno,
+       int                     isroot)
+{
+       xfs_alloc_block_t       *block = (xfs_alloc_block_t *)ablock;
+       int                     i;
+       xfs_alloc_ptr_t         *pp;
+       xfs_alloc_rec_t         *rp;
+       xfs_agnumber_t          seqno = INT_GET(agf->agf_seqno, ARCH_CONVERT);
+
+       if (INT_GET(block->bb_magic, ARCH_CONVERT) != XFS_ABTB_MAGIC) {
+               dbprintf("bad magic # %#x in btbno block %u/%u\n",
+                       INT_GET(block->bb_magic, ARCH_CONVERT), seqno, bno);
+               serious_error++;
+               return;
+       }
+       fdblocks++;
+       if (INT_GET(block->bb_level, ARCH_CONVERT) != level) {
+               if (!sflag)
+                       dbprintf("expected level %d got %d in btbno block "
+                                "%u/%u\n",
+                               level, INT_GET(block->bb_level, ARCH_CONVERT), seqno, bno);
+               error++;
+       }
+       set_dbmap(seqno, bno, 1, DBM_BTBNO, seqno, bno);
+       if (level == 0) {
+               if (INT_GET(block->bb_numrecs, ARCH_CONVERT) > mp->m_alloc_mxr[0] ||
+                   isroot == 0 && INT_GET(block->bb_numrecs, ARCH_CONVERT) < mp->m_alloc_mnr[0]) {
+                       dbprintf("bad btree nrecs (%u, min=%u, max=%u) in "
+                                "btbno block %u/%u\n",
+                               INT_GET(block->bb_numrecs, ARCH_CONVERT), mp->m_alloc_mnr[0],
+                               mp->m_alloc_mxr[0], seqno, bno);
+                       serious_error++;
+                       return;
+               }
+               rp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, block,
+                       1, mp->m_alloc_mxr[0]);
+               for (i = 0; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++) {
+                       set_dbmap(seqno, INT_GET(rp[i].ar_startblock, ARCH_CONVERT),
+                               INT_GET(rp[i].ar_blockcount, ARCH_CONVERT), DBM_FREE1,
+                               seqno, bno);
+               }
+               return;
+       }
+       if (INT_GET(block->bb_numrecs, ARCH_CONVERT) > mp->m_alloc_mxr[1] ||
+           isroot == 0 && INT_GET(block->bb_numrecs, ARCH_CONVERT) < mp->m_alloc_mnr[1]) {
+               dbprintf("bad btree nrecs (%u, min=%u, max=%u) in btbno block "
+                        "%u/%u\n",
+                       INT_GET(block->bb_numrecs, ARCH_CONVERT), mp->m_alloc_mnr[1],
+                       mp->m_alloc_mxr[1], seqno, bno);
+               serious_error++;
+               return;
+       }
+       pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, block, 1,
+               mp->m_alloc_mxr[1]);
+       for (i = 0; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++)
+               scan_sbtree(agf, INT_GET(pp[i], ARCH_CONVERT), level, 0, scanfunc_bno, TYP_BNOBT);
+}
+
+static void
+scanfunc_cnt(
+       xfs_btree_sblock_t      *ablock,
+       int                     level,
+       xfs_agf_t               *agf,
+       xfs_agblock_t           bno,
+       int                     isroot)
+{
+       xfs_alloc_block_t       *block = (xfs_alloc_block_t *)ablock;
+       xfs_agnumber_t          seqno = INT_GET(agf->agf_seqno, ARCH_CONVERT);
+       int                     i;
+       xfs_alloc_ptr_t         *pp;
+       xfs_alloc_rec_t         *rp;
+
+       if (INT_GET(block->bb_magic, ARCH_CONVERT) != XFS_ABTC_MAGIC) {
+               dbprintf("bad magic # %#x in btcnt block %u/%u\n",
+                       INT_GET(block->bb_magic, ARCH_CONVERT), seqno, bno);
+               serious_error++;
+               return;
+       }
+       fdblocks++;
+       if (INT_GET(block->bb_level, ARCH_CONVERT) != level) {
+               if (!sflag)
+                       dbprintf("expected level %d got %d in btcnt block "
+                                "%u/%u\n",
+                               level, INT_GET(block->bb_level, ARCH_CONVERT), seqno, bno);
+               error++;
+       }
+       set_dbmap(seqno, bno, 1, DBM_BTCNT, seqno, bno);
+       if (level == 0) {
+               if (INT_GET(block->bb_numrecs, ARCH_CONVERT) > mp->m_alloc_mxr[0] ||
+                   isroot == 0 && INT_GET(block->bb_numrecs, ARCH_CONVERT) < mp->m_alloc_mnr[0])  {
+                       dbprintf("bad btree nrecs (%u, min=%u, max=%u) in "
+                                "btbno block %u/%u\n",
+                               INT_GET(block->bb_numrecs, ARCH_CONVERT), mp->m_alloc_mnr[0],
+                               mp->m_alloc_mxr[0], seqno, bno);
+                       serious_error++;
+                       return;
+               }
+               rp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, block,
+                       1, mp->m_alloc_mxr[0]);
+               for (i = 0; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++) {
+                       check_set_dbmap(seqno, INT_GET(rp[i].ar_startblock, ARCH_CONVERT),
+                               INT_GET(rp[i].ar_blockcount, ARCH_CONVERT), DBM_FREE1, DBM_FREE2,
+                               seqno, bno);
+                       fdblocks += INT_GET(rp[i].ar_blockcount, ARCH_CONVERT);
+                       agffreeblks += INT_GET(rp[i].ar_blockcount, ARCH_CONVERT);
+                       if (INT_GET(rp[i].ar_blockcount, ARCH_CONVERT) > agflongest)
+                               agflongest = INT_GET(rp[i].ar_blockcount, ARCH_CONVERT);
+               }
+               return;
+       }
+       if (INT_GET(block->bb_numrecs, ARCH_CONVERT) > mp->m_alloc_mxr[1] ||
+           isroot == 0 && INT_GET(block->bb_numrecs, ARCH_CONVERT) < mp->m_alloc_mnr[1])  {
+               dbprintf("bad btree nrecs (%u, min=%u, max=%u) in btbno block "
+                        "%u/%u\n",
+                       INT_GET(block->bb_numrecs, ARCH_CONVERT), mp->m_alloc_mnr[1],
+                       mp->m_alloc_mxr[1], seqno, bno);
+               serious_error++;
+               return;
+       }
+       pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, block, 1,
+               mp->m_alloc_mxr[1]);
+       for (i = 0; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++)
+               scan_sbtree(agf, INT_GET(pp[i], ARCH_CONVERT), level, 0, scanfunc_cnt, TYP_CNTBT);
+}
+
+static void
+scanfunc_ino(
+       xfs_btree_sblock_t      *ablock,
+       int                     level,
+       xfs_agf_t               *agf,
+       xfs_agblock_t           bno,
+       int                     isroot)
+{
+       xfs_agino_t             agino;
+       xfs_inobt_block_t       *block = (xfs_inobt_block_t *)ablock;
+       xfs_agnumber_t          seqno = INT_GET(agf->agf_seqno, ARCH_CONVERT);
+       int                     i;
+       int                     isfree;
+       int                     j;
+       int                     nfree;
+       int                     off;
+       xfs_inobt_ptr_t         *pp;
+       xfs_inobt_rec_t         *rp;
+
+       if (INT_GET(block->bb_magic, ARCH_CONVERT) != XFS_IBT_MAGIC) {
+               dbprintf("bad magic # %#x in inobt block %u/%u\n",
+                       INT_GET(block->bb_magic, ARCH_CONVERT), seqno, bno);
+               serious_error++;
+               return;
+       }
+       if (INT_GET(block->bb_level, ARCH_CONVERT) != level) {
+               if (!sflag)
+                       dbprintf("expected level %d got %d in inobt block "
+                                "%u/%u\n",
+                               level, INT_GET(block->bb_level, ARCH_CONVERT), seqno, bno);
+               error++;
+       }
+       set_dbmap(seqno, bno, 1, DBM_BTINO, seqno, bno);
+       if (level == 0) {
+               if (INT_GET(block->bb_numrecs, ARCH_CONVERT) > mp->m_inobt_mxr[0] ||
+                   isroot == 0 && INT_GET(block->bb_numrecs, ARCH_CONVERT) < mp->m_inobt_mnr[0]) {
+                       dbprintf("bad btree nrecs (%u, min=%u, max=%u) in "
+                                "inobt block %u/%u\n",
+                               INT_GET(block->bb_numrecs, ARCH_CONVERT), mp->m_inobt_mnr[0],
+                               mp->m_inobt_mxr[0], seqno, bno);
+                       serious_error++;
+                       return;
+               }
+               rp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_inobt, block,
+                       1, mp->m_inobt_mxr[0]);
+               for (i = 0; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++) {
+                       agino = INT_GET(rp[i].ir_startino, ARCH_CONVERT);
+                       off = XFS_INO_TO_OFFSET(mp, agino);
+                       if (off == 0) {
+                               if ((sbversion & XFS_SB_VERSION_ALIGNBIT) &&
+                                   mp->m_sb.sb_inoalignmt &&
+                                   (XFS_INO_TO_AGBNO(mp, agino) %
+                                    mp->m_sb.sb_inoalignmt))
+                                       sbversion &= ~XFS_SB_VERSION_ALIGNBIT;
+                               set_dbmap(seqno, XFS_AGINO_TO_AGBNO(mp, agino),
+                                       (xfs_extlen_t)MAX(1,
+                                               XFS_INODES_PER_CHUNK >>
+                                               mp->m_sb.sb_inopblog),
+                                       DBM_INODE, seqno, bno);
+                       }
+                       icount += XFS_INODES_PER_CHUNK;
+                       agicount += XFS_INODES_PER_CHUNK;
+                       ifree += INT_GET(rp[i].ir_freecount, ARCH_CONVERT);
+                       agifreecount += INT_GET(rp[i].ir_freecount, ARCH_CONVERT);
+                       push_cur();
+                       set_cur(&typtab[TYP_INODE],
+                               XFS_AGB_TO_DADDR(mp, seqno,
+                                                XFS_AGINO_TO_AGBNO(mp, agino)),
+                               (int)XFS_FSB_TO_BB(mp, XFS_IALLOC_BLOCKS(mp)),
+                               DB_RING_IGN, NULL);
+                       if (iocur_top->data == NULL) {
+                               if (!sflag)
+                                       dbprintf("can't read inode block "
+                                                "%u/%u\n",
+                                               seqno,
+                                               XFS_AGINO_TO_AGBNO(mp, agino));
+                               error++;
+                               continue;
+                       }
+                       for (j = 0, nfree = 0; j < XFS_INODES_PER_CHUNK; j++) {
+                               if (isfree = XFS_INOBT_IS_FREE(&rp[i], j, ARCH_CONVERT))
+                                       nfree++;
+                               process_inode(agf, agino + j,
+                                       (xfs_dinode_t *)((char *)iocur_top->data + ((off + j) << mp->m_sb.sb_inodelog)),
+                                               isfree);
+                       }
+                       if (nfree != INT_GET(rp[i].ir_freecount, ARCH_CONVERT)) {
+                               if (!sflag)
+                                       dbprintf("ir_freecount/free mismatch, "
+                                                "inode chunk %u/%u, freecount "
+                                                "%d nfree %d\n",
+                                               seqno, agino,
+                                               INT_GET(rp[i].ir_freecount, ARCH_CONVERT), nfree);
+                               error++;
+                       }
+                       pop_cur();
+               }
+               return;
+       }
+       if (INT_GET(block->bb_numrecs, ARCH_CONVERT) > mp->m_inobt_mxr[1] ||
+           isroot == 0 && INT_GET(block->bb_numrecs, ARCH_CONVERT) < mp->m_inobt_mnr[1]) {
+               dbprintf("bad btree nrecs (%u, min=%u, max=%u) in inobt block "
+                        "%u/%u\n",
+                       INT_GET(block->bb_numrecs, ARCH_CONVERT), mp->m_inobt_mnr[1],
+                       mp->m_inobt_mxr[1], seqno, bno);
+               serious_error++;
+               return;
+       }
+       pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_inobt, block, 1,
+               mp->m_inobt_mxr[1]);
+       for (i = 0; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++)
+               scan_sbtree(agf, INT_GET(pp[i], ARCH_CONVERT), level, 0, scanfunc_ino, TYP_INOBT);
+}
+
+static void
+set_dbmap(
+       xfs_agnumber_t  agno,
+       xfs_agblock_t   agbno,
+       xfs_extlen_t    len,
+       dbm_t           type,
+       xfs_agnumber_t  c_agno,
+       xfs_agblock_t   c_agbno)
+{
+       check_set_dbmap(agno, agbno, len, DBM_UNKNOWN, type, c_agno, c_agbno);
+}
+
+static void
+set_inomap(
+       xfs_agnumber_t  agno,
+       xfs_agblock_t   agbno,
+       xfs_extlen_t    len,
+       inodata_t       *id)
+{
+       xfs_extlen_t    i;
+       inodata_t       **idp;
+       int             mayprint;
+
+       if (!check_inomap(agno, agbno, len, id->ino))
+               return;
+       mayprint = verbose | id->ilist | blist_size;
+       for (i = 0, idp = &inomap[agno][agbno]; i < len; i++, idp++) {
+               *idp = id;
+               if (mayprint &&
+                   (verbose || id->ilist || CHECK_BLISTA(agno, agbno + i)))
+                       dbprintf("setting inode to %lld for block %u/%u\n",
+                               id->ino, agno, agbno + i);
+       }
+}
+
+static void
+set_rdbmap(
+       xfs_drfsbno_t   bno,
+       xfs_extlen_t    len,
+       dbm_t           type)
+{
+       check_set_rdbmap(bno, len, DBM_UNKNOWN, type);
+}
+
+static void
+set_rinomap(
+       xfs_drfsbno_t   bno,
+       xfs_extlen_t    len,
+       inodata_t       *id)
+{
+       xfs_extlen_t    i;
+       inodata_t       **idp;
+       int             mayprint;
+
+       if (!check_rinomap(bno, len, id->ino))
+               return;
+       mayprint = verbose | id->ilist | blist_size;
+       for (i = 0, idp = &inomap[mp->m_sb.sb_agcount][bno];
+            i < len;
+            i++, idp++) {
+               *idp = id;
+               if (mayprint && (verbose || id->ilist || CHECK_BLIST(bno + i)))
+                       dbprintf("setting inode to %lld for rtblock %llu\n",
+                               id->ino, bno + i);
+       }
+}
+
+static void
+setlink_inode(
+       inodata_t       *id,
+       nlink_t         nlink,
+       int             isdir,
+       int             security)
+{
+       id->link_set = nlink;
+       id->isdir = isdir;
+       id->security = security;
+       if (verbose || id->ilist)
+               dbprintf("inode %lld nlink %u %s dir\n", id->ino, nlink,
+                       isdir ? "is" : "not");
+}
diff --git a/db/check.h b/db/check.h
new file mode 100644 (file)
index 0000000..71d5576
--- /dev/null
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+extern void    check_init(void);
diff --git a/db/cntbt.c b/db/cntbt.c
new file mode 100644 (file)
index 0000000..5e4a6da
--- /dev/null
@@ -0,0 +1,193 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "data.h"
+#include "type.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "cntbt.h"
+#include "print.h"
+#include "bit.h"
+#include "mount.h"
+
+static int     cntbt_key_count(void *obj, int startoff);
+static int     cntbt_key_offset(void *obj, int startoff, int idx);
+static int     cntbt_ptr_count(void *obj, int startoff);
+static int     cntbt_ptr_offset(void *obj, int startoff, int idx);
+static int     cntbt_rec_count(void *obj, int startoff);
+static int     cntbt_rec_offset(void *obj, int startoff, int idx);
+
+const field_t  cntbt_hfld[] = {
+       { "", FLDT_CNTBT, OI(0), C1, 0, TYP_NONE },
+       { NULL }
+};
+
+#define        OFF(f)  bitize(offsetof(xfs_alloc_block_t, bb_ ## f))
+const field_t  cntbt_flds[] = {
+       { "magic", FLDT_UINT32X, OI(OFF(magic)), C1, 0, TYP_NONE },
+       { "level", FLDT_UINT16D, OI(OFF(level)), C1, 0, TYP_NONE },
+       { "numrecs", FLDT_UINT16D, OI(OFF(numrecs)), C1, 0, TYP_NONE },
+       { "leftsib", FLDT_AGBLOCK, OI(OFF(leftsib)), C1, 0, TYP_CNTBT },
+       { "rightsib", FLDT_AGBLOCK, OI(OFF(rightsib)), C1, 0, TYP_CNTBT },
+       { "recs", FLDT_CNTBTREC, cntbt_rec_offset, cntbt_rec_count,
+         FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_NONE },
+       { "keys", FLDT_CNTBTKEY, cntbt_key_offset, cntbt_key_count,
+         FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_NONE },
+       { "ptrs", FLDT_CNTBTPTR, cntbt_ptr_offset, cntbt_ptr_count,
+         FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_CNTBT },
+       { NULL }
+};
+
+#define        KOFF(f) bitize(offsetof(xfs_alloc_key_t, ar_ ## f))
+const field_t  cntbt_key_flds[] = {
+       { "blockcount", FLDT_EXTLEN, OI(KOFF(blockcount)), C1, 0, TYP_NONE },
+       { "startblock", FLDT_AGBLOCK, OI(KOFF(startblock)), C1, 0, TYP_DATA },
+       { NULL }
+};
+
+#define        ROFF(f) bitize(offsetof(xfs_alloc_rec_t, ar_ ## f))
+const field_t  cntbt_rec_flds[] = {
+       { "startblock", FLDT_AGBLOCK, OI(ROFF(startblock)), C1, 0, TYP_DATA },
+       { "blockcount", FLDT_EXTLEN, OI(ROFF(blockcount)), C1, 0, TYP_NONE },
+       { NULL }
+};
+
+/*ARGSUSED*/
+static int
+cntbt_key_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_alloc_block_t       *block;
+
+       ASSERT(startoff == 0);
+       block = obj;
+       if (INT_GET(block->bb_level, ARCH_CONVERT) == 0)
+               return 0;
+       return INT_GET(block->bb_numrecs, ARCH_CONVERT);
+}
+
+/*ARGSUSED*/
+static int
+cntbt_key_offset(
+       void                    *obj,
+       int                     startoff,
+       int                     idx)
+{
+       xfs_alloc_block_t       *block;
+       xfs_alloc_key_t         *kp;
+
+       ASSERT(startoff == 0);
+       block = obj;
+       ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0);
+       kp = XFS_BTREE_KEY_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, block, idx,
+               XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_alloc, 0));
+       return bitize((int)((char *)kp - (char *)block));
+}
+
+/*ARGSUSED*/
+static int
+cntbt_ptr_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_alloc_block_t       *block;
+
+       ASSERT(startoff == 0);
+       block = obj;
+       if (INT_GET(block->bb_level, ARCH_CONVERT) == 0)
+               return 0;
+       return INT_GET(block->bb_numrecs, ARCH_CONVERT);
+}
+
+/*ARGSUSED*/
+static int
+cntbt_ptr_offset(
+       void                    *obj,
+       int                     startoff,
+       int                     idx)
+{
+       xfs_alloc_block_t       *block;
+       xfs_alloc_ptr_t         *pp;
+
+       ASSERT(startoff == 0);
+       block = obj;
+       ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0);
+       pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, block, idx,
+               XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_alloc, 0));
+       return bitize((int)((char *)pp - (char *)block));
+}
+
+/*ARGSUSED*/
+static int
+cntbt_rec_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_alloc_block_t       *block;
+
+       ASSERT(startoff == 0);
+       block = obj;
+       if (INT_GET(block->bb_level, ARCH_CONVERT) > 0)
+               return 0;
+       return INT_GET(block->bb_numrecs, ARCH_CONVERT);
+}
+
+/*ARGSUSED*/
+static int
+cntbt_rec_offset(
+       void                    *obj,
+       int                     startoff,
+       int                     idx)
+{
+       xfs_alloc_block_t       *block;
+       xfs_alloc_rec_t         *rp;
+
+       ASSERT(startoff == 0);
+       block = obj;
+       ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) == 0);
+       rp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, block, idx,
+               XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_alloc, 1));
+       return bitize((int)((char *)rp - (char *)block));
+}
+
+/*ARGSUSED*/
+int
+cntbt_size(
+       void    *obj,
+       int     startoff,
+       int     idx)
+{
+       return bitize(mp->m_sb.sb_blocksize);
+}
diff --git a/db/cntbt.h b/db/cntbt.h
new file mode 100644 (file)
index 0000000..7681503
--- /dev/null
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+struct field;
+
+extern const struct field      cntbt_flds[];
+extern const struct field      cntbt_hfld[];
+extern const struct field      cntbt_key_flds[];
+extern const struct field      cntbt_rec_flds[];
+
+extern int     cntbt_size(void *obj, int startoff, int idx);
diff --git a/db/command.c b/db/command.c
new file mode 100644 (file)
index 0000000..5c8153a
--- /dev/null
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include <getopt.h>
+#include "addr.h"
+#include "agf.h"
+#include "agfl.h"
+#include "agi.h"
+#include "block.h"
+#include "bmap.h"
+#include "check.h"
+#include "command.h"
+#include "convert.h"
+#include "debug.h"
+#include "type.h"
+#include "echo.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "frag.h"
+#include "freesp.h"
+#include "help.h"
+#include "hash.h"
+#include "inode.h"
+#include "input.h"
+#include "io.h"
+#include "output.h"
+#include "print.h"
+#include "quit.h"
+#include "sb.h"
+#include "uuid.h"
+#include "write.h"
+#include "malloc.h"
+#include "dquot.h"
+
+cmdinfo_t      *cmdtab;
+int            ncmds;
+
+static int     cmd_compare(const void *a, const void *b);
+
+static int
+cmd_compare(const void *a, const void *b)
+{
+       return strcmp(((const cmdinfo_t *)a)->name,
+                     ((const cmdinfo_t *)b)->name);
+}
+
+void
+add_command(
+       const cmdinfo_t *ci)
+{
+       cmdtab = xrealloc((void *)cmdtab, ++ncmds * sizeof(*cmdtab));
+       cmdtab[ncmds - 1] = *ci;
+       qsort(cmdtab, ncmds, sizeof(*cmdtab), cmd_compare);
+}
+
+int
+command(
+       int             argc,
+       char            **argv)
+{
+       char            *cmd;
+       const cmdinfo_t *ct;
+
+       cmd = argv[0];
+       ct = find_command(cmd);
+       if (ct == NULL) {
+               dbprintf("command %s not found\n", cmd);
+               return 0;
+       }
+       if (argc-1 < ct->argmin || (ct->argmax != -1 && argc-1 > ct->argmax)) {
+               dbprintf("bad argument count %d to %s, expected ", argc-1, cmd);
+               if (ct->argmax == -1)
+                       dbprintf("at least %d", ct->argmin);
+               else if (ct->argmin == ct->argmax)
+                       dbprintf("%d", ct->argmin);
+               else
+                       dbprintf("between %d and %d", ct->argmin, ct->argmax);
+               dbprintf(" arguments\n");
+               return 0;
+       }
+       optind = 0;
+       return ct->cfunc(argc, argv);
+}
+
+const cmdinfo_t *
+find_command(
+       const char      *cmd)
+{
+       cmdinfo_t       *ct;
+
+       for (ct = cmdtab; ct < &cmdtab[ncmds]; ct++) {
+               if (strcmp(ct->name, cmd) == 0 ||
+                   (ct->altname && strcmp(ct->altname, cmd) == 0))
+                       return (const cmdinfo_t *)ct;
+       }
+       return NULL;
+}
+
+void
+init_commands(void)
+{
+       addr_init();
+       agf_init();
+       agfl_init();
+       agi_init();
+       block_init();
+       bmap_init();
+       check_init();
+       convert_init();
+       debug_init();
+       echo_init();
+       frag_init();
+       freesp_init();
+       help_init();
+       hash_init();
+       inode_init();
+       input_init();
+       io_init();
+       output_init();
+       print_init();
+       quit_init();
+       sb_init();
+       uuid_init();
+       type_init();
+       write_init();
+       dquot_init();
+}
diff --git a/db/command.h b/db/command.h
new file mode 100644 (file)
index 0000000..dd35ed6
--- /dev/null
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+typedef int (*cfunc_t)(int argc, char **argv);
+typedef void (*helpfunc_t)(void);
+
+typedef struct cmdinfo
+{
+       const char      *name;
+       const char      *altname;
+       cfunc_t         cfunc;
+       int             argmin;
+       int             argmax;
+       int             canpush;
+       const char      *args;
+       const char      *oneline;
+       helpfunc_t      help;
+} cmdinfo_t;
+
+extern cmdinfo_t       *cmdtab;
+extern int             ncmds;
+
+extern void            add_command(const cmdinfo_t *ci);
+extern int             command(int argc, char **argv);
+extern const cmdinfo_t *find_command(const char *cmd);
+extern void            init_commands(void);
diff --git a/db/convert.c b/db/convert.c
new file mode 100644 (file)
index 0000000..02a4d24
--- /dev/null
@@ -0,0 +1,340 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "command.h"
+#include "data.h"
+#include "convert.h"
+#include "output.h"
+#include "mount.h"
+
+#define        M(A)    (1 << CT_ ## A)
+#define        agblock_to_bytes(x)     \
+       ((__uint64_t)(x) << mp->m_sb.sb_blocklog)
+#define        agino_to_bytes(x)       \
+       ((__uint64_t)(x) << mp->m_sb.sb_inodelog)
+#define        agnumber_to_bytes(x)    \
+       agblock_to_bytes((__uint64_t)(x) * mp->m_sb.sb_agblocks)
+#define        daddr_to_bytes(x)       \
+       ((__uint64_t)(x) << BBSHIFT)
+#define        fsblock_to_bytes(x)     \
+       (agnumber_to_bytes(XFS_FSB_TO_AGNO(mp, (x))) + \
+        agblock_to_bytes(XFS_FSB_TO_AGBNO(mp, (x))))
+#define        ino_to_bytes(x)         \
+       (agnumber_to_bytes(XFS_INO_TO_AGNO(mp, (x))) + \
+        agino_to_bytes(XFS_INO_TO_AGINO(mp, (x))))
+#define        inoidx_to_bytes(x)      \
+       ((__uint64_t)(x) << mp->m_sb.sb_inodelog)
+
+typedef enum {
+       CT_NONE = -1, 
+       CT_AGBLOCK,             /* xfs_agblock_t */
+       CT_AGINO,               /* xfs_agino_t */
+       CT_AGNUMBER,            /* xfs_agno_t */
+       CT_BBOFF,               /* byte offset in daddr */
+       CT_BLKOFF,              /* byte offset in fsb/agb */
+       CT_BYTE,                /* byte in filesystem */
+       CT_DADDR,               /* daddr_t */
+       CT_FSBLOCK,             /* xfs_fsblock_t */
+       CT_INO,                 /* xfs_ino_t */
+       CT_INOIDX,              /* index of inode in fsblock */
+       CT_INOOFF,              /* byte offset in inode */
+       NCTS
+} ctype_t;
+
+typedef struct ctydesc {
+       ctype_t         ctype;
+       int             allowed;
+       const char      **names;
+} ctydesc_t;
+
+typedef union {
+       xfs_agblock_t   agblock;
+       xfs_agino_t     agino;
+       xfs_agnumber_t  agnumber;
+       int             bboff;
+       int             blkoff;
+       __uint64_t      byte;
+       xfs_daddr_t     daddr;
+       xfs_fsblock_t   fsblock;
+       xfs_ino_t       ino;
+       int             inoidx;
+       int             inooff;
+} cval_t;
+
+static __uint64_t              bytevalue(ctype_t ctype, cval_t *val);
+static int             convert_f(int argc, char **argv);
+static int             getvalue(char *s, ctype_t ctype, cval_t *val);
+static ctype_t         lookupcty(char *ctyname);
+
+static const char      *agblock_names[] = { "agblock", "agbno", NULL };
+static const char      *agino_names[] = { "agino", "aginode", NULL };
+static const char      *agnumber_names[] = { "agnumber", "agno", NULL };
+static const char      *bboff_names[] = { "bboff", "daddroff", NULL };
+static const char      *blkoff_names[] = { "blkoff", "fsboff", "agboff",
+                                           NULL };
+static const char      *byte_names[] = { "byte", "fsbyte", NULL };
+static const char      *daddr_names[] = { "daddr", "bb", NULL };
+static const char      *fsblock_names[] = { "fsblock", "fsb", "fsbno", NULL };
+static const char      *ino_names[] = { "ino", "inode", NULL };
+static const char      *inoidx_names[] = { "inoidx", "offset", NULL };
+static const char      *inooff_names[] = { "inooff", "inodeoff", NULL };
+
+static const ctydesc_t ctydescs[NCTS] = {
+       { CT_AGBLOCK, M(AGNUMBER)|M(BBOFF)|M(BLKOFF)|M(INOIDX)|M(INOOFF),
+         agblock_names },
+       { CT_AGINO, M(AGNUMBER)|M(INOOFF), agino_names },
+       { CT_AGNUMBER,
+         M(AGBLOCK)|M(AGINO)|M(BBOFF)|M(BLKOFF)|M(INOIDX)|M(INOOFF),
+         agnumber_names },
+       { CT_BBOFF, M(AGBLOCK)|M(AGNUMBER)|M(DADDR)|M(FSBLOCK), bboff_names },
+       { CT_BLKOFF, M(AGBLOCK)|M(AGNUMBER)|M(FSBLOCK), blkoff_names },
+       { CT_BYTE, 0, byte_names },
+       { CT_DADDR, M(BBOFF), daddr_names },
+       { CT_FSBLOCK, M(BBOFF)|M(BLKOFF)|M(INOIDX), fsblock_names },
+       { CT_INO, M(INOOFF), ino_names },
+       { CT_INOIDX, M(AGBLOCK)|M(AGNUMBER)|M(FSBLOCK)|M(INOOFF),
+         inoidx_names },
+       { CT_INOOFF,
+         M(AGBLOCK)|M(AGINO)|M(AGNUMBER)|M(FSBLOCK)|M(INO)|M(INOIDX),
+         inooff_names },
+};
+
+static const cmdinfo_t convert_cmd =
+       { "convert", NULL, convert_f, 3, 9, 0, "type num [type num]... type",
+         "convert from one address form to another", NULL };
+
+static __uint64_t
+bytevalue(ctype_t ctype, cval_t *val)
+{
+       switch (ctype) {
+       case CT_AGBLOCK:
+               return agblock_to_bytes(val->agblock);
+       case CT_AGINO:
+               return agino_to_bytes(val->agino);
+       case CT_AGNUMBER:
+               return agnumber_to_bytes(val->agnumber);
+       case CT_BBOFF:
+               return (__uint64_t)val->bboff;
+       case CT_BLKOFF:
+               return (__uint64_t)val->blkoff;
+       case CT_BYTE:
+               return val->byte;
+       case CT_DADDR:
+               return daddr_to_bytes(val->daddr);
+       case CT_FSBLOCK:
+               return fsblock_to_bytes(val->fsblock);
+       case CT_INO:
+               return ino_to_bytes(val->ino);
+       case CT_INOIDX:
+               return inoidx_to_bytes(val->inoidx);
+       case CT_INOOFF:
+               return (__uint64_t)val->inooff;
+       case CT_NONE:
+       case NCTS:
+       }
+       /* NOTREACHED */
+       return 0;
+}
+
+static int
+convert_f(int argc, char **argv)
+{
+       ctype_t         c;
+       int             conmask;
+       cval_t          cvals[NCTS];
+       int             i;
+       int             mask;
+       __uint64_t      v;
+       ctype_t         wtype;
+
+       /* move past the "convert" command */
+       argc--;
+       argv++;
+
+       if ((argc % 2) != 1) {
+               dbprintf("bad argument count %d to convert, expected 3,5,7,9 "
+                        "arguments\n", argc);
+               return 0;
+       }
+       if ((wtype = lookupcty(argv[argc - 1])) == CT_NONE) {
+               dbprintf("unknown conversion type %s\n", argv[argc - 1]);
+               return 0;
+       }
+
+       for (i = mask = conmask = 0; i < (argc - 1) / 2; i++) {
+               c = lookupcty(argv[i * 2]);
+               if (c == CT_NONE) {
+                       dbprintf("unknown conversion type %s\n", argv[i * 2]);
+                       return 0;
+               }
+               if (c == wtype) {
+                       dbprintf("result type same as argument\n");
+                       return 0;
+               }
+               if (conmask & (1 << c)) {
+                       dbprintf("conflicting conversion type %s\n",
+                               argv[i * 2]);
+                       return 0;
+               }
+               if (!getvalue(argv[i * 2 + 1], c, &cvals[c]))
+                       return 0;
+               mask |= 1 << c;
+               conmask |= ~ctydescs[c].allowed;
+       }
+       if (cur_agno != NULLAGNUMBER && (conmask & M(AGNUMBER)) == 0) {
+               cvals[CT_AGNUMBER].agnumber = cur_agno;
+               mask |= M(AGNUMBER);
+               conmask |= ~ctydescs[CT_AGNUMBER].allowed;
+       }
+       v = 0;
+       for (c = (ctype_t)0; c < NCTS; c++) {
+               if (!(mask & (1 << c)))
+                       continue;
+               v += bytevalue(c, &cvals[c]);
+       }
+       switch (wtype) {
+       case CT_AGBLOCK:
+               v = XFS_DADDR_TO_AGBNO(mp, v >> BBSHIFT);
+               break;
+       case CT_AGINO:
+               v = (v >> mp->m_sb.sb_inodelog) %
+                   (mp->m_sb.sb_agblocks << mp->m_sb.sb_inopblog);
+               break;
+       case CT_AGNUMBER:
+               v = XFS_DADDR_TO_AGNO(mp, v >> BBSHIFT);
+               break;
+       case CT_BBOFF:
+               v &= BBMASK;
+               break;
+       case CT_BLKOFF:
+               v &= mp->m_blockmask;
+               break;
+       case CT_BYTE:
+               break;
+       case CT_DADDR:
+               v >>= BBSHIFT;
+               break;
+       case CT_FSBLOCK:
+               v = XFS_DADDR_TO_FSB(mp, v >> BBSHIFT);
+               break;
+       case CT_INO:
+               v = XFS_AGINO_TO_INO(mp, XFS_DADDR_TO_AGNO(mp, v >> BBSHIFT),
+                       (v >> mp->m_sb.sb_inodelog) %
+                       (mp->m_sb.sb_agblocks << mp->m_sb.sb_inopblog));
+               break;
+       case CT_INOIDX:
+               v = (v >> mp->m_sb.sb_inodelog) & (mp->m_sb.sb_inopblock - 1);
+               break;
+       case CT_INOOFF:
+               v &= mp->m_sb.sb_inodesize - 1;
+               break;
+       case CT_NONE:
+       case NCTS:
+               /* NOTREACHED */
+       }
+       dbprintf("0x%llx (%llu)\n", v, v);
+       return 0;
+}
+
+void
+convert_init(void)
+{
+       add_command(&convert_cmd);
+}
+
+static int
+getvalue(char *s, ctype_t ctype, cval_t *val)
+{
+       char            *p;
+       __uint64_t      v;
+
+       v = strtoull(s, &p, 0);
+       if (*p != '\0') {
+               dbprintf("%s is not a number\n", s);
+               return 0;
+       }
+       switch (ctype) {
+       case CT_AGBLOCK:
+               val->agblock = (xfs_agblock_t)v;
+               break;
+       case CT_AGINO:
+               val->agino = (xfs_agino_t)v;
+               break;
+       case CT_AGNUMBER:
+               val->agnumber = (xfs_agnumber_t)v;
+               break;
+       case CT_BBOFF:
+               val->bboff = (int)v;
+               break;
+       case CT_BLKOFF:
+               val->blkoff = (int)v;
+               break;
+       case CT_BYTE:
+               val->byte = (__uint64_t)v;
+               break;
+       case CT_DADDR:
+               val->daddr = (xfs_daddr_t)v;
+               break;
+       case CT_FSBLOCK:
+               val->fsblock = (xfs_fsblock_t)v;
+               break;
+       case CT_INO:
+               val->ino = (xfs_ino_t)v;
+               break;
+       case CT_INOIDX:
+               val->inoidx = (int)v;
+               break;
+       case CT_INOOFF:
+               val->inooff = (int)v;
+               break;
+       case CT_NONE:
+       case NCTS:
+               /* NOTREACHED */
+       }
+       return 1;
+}
+
+static ctype_t
+lookupcty(char *ctyname)
+{
+       ctype_t         cty;
+       const char      **name;
+
+       for (cty = (ctype_t)0; cty < NCTS; cty++) {
+               for (name = ctydescs[cty].names; *name; name++) {
+                       if (strcmp(ctyname, *name) == 0)
+                               return cty;
+               }
+       }
+       return CT_NONE;
+}
diff --git a/db/convert.h b/db/convert.h
new file mode 100644 (file)
index 0000000..0ddbca4
--- /dev/null
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+extern void    convert_init(void);
diff --git a/db/data.c b/db/data.c
new file mode 100644 (file)
index 0000000..c53a5bb
--- /dev/null
+++ b/db/data.c
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "data.h"
+
+int                    blkbb;
+xfs_agnumber_t         cur_agno = NULLAGNUMBER;
+int                    exitcode;
+int                     flag_expert_mode = 0;
+int                     flag_readonly = 0;
+libxfs_init_t          xfsargs;
diff --git a/db/data.h b/db/data.h
new file mode 100644 (file)
index 0000000..77e51ff
--- /dev/null
+++ b/db/data.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+extern int             blkbb;
+extern xfs_agnumber_t  cur_agno;
+extern int             exitcode;
+extern int              flag_expert_mode;
+extern int              flag_readonly;
+extern int              flag_arch;
+extern libxfs_init_t   xfsargs;
diff --git a/db/dbread.c b/db/dbread.c
new file mode 100644 (file)
index 0000000..95032ba
--- /dev/null
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "bmap.h"
+#include "data.h"
+#include "dbread.h"
+#include "io.h"
+#include "mount.h"
+
+int
+dbread(void *buf, int nblocks, xfs_fileoff_t bno, int whichfork)
+{
+       bmap_ext_t      bm;
+       char            *bp;
+       xfs_dfiloff_t   eb;
+       xfs_dfiloff_t   end;
+       int             i;
+       int             nex;
+
+       nex = 1;
+       end = bno + nblocks;
+       bp = buf;
+       while (bno < end) {
+               bmap(bno, end - bno, whichfork, &nex, &bm);
+               if (nex == 0) {
+                       bm.startoff = end;
+                       bm.blockcount = 1;
+               }
+               if (bm.startoff > bno) {
+                       eb = end < bm.startoff ? end : bm.startoff;
+                       i = (int)XFS_FSB_TO_B(mp, eb - bno);
+                       memset(bp, 0, i);
+                       bp += i;
+                       bno = eb;
+               }
+               if (bno == end)
+                       break;
+               if (bno > bm.startoff) {
+                       bm.blockcount -= bno - bm.startoff;
+                       bm.startblock += bno - bm.startoff;
+                       bm.startoff = bno;
+               }
+               if (bm.startoff + bm.blockcount > end)
+                       bm.blockcount = end - bm.startoff;
+               i = read_bbs(XFS_FSB_TO_DADDR(mp, bm.startblock),
+                            (int)XFS_FSB_TO_BB(mp, bm.blockcount),
+                            (void **)&bp, NULL);
+               if (i)
+                       return i;
+               bp += XFS_FSB_TO_B(mp, bm.blockcount);
+               bno += bm.blockcount;
+       }
+       return 0;
+}
diff --git a/db/dbread.h b/db/dbread.h
new file mode 100644 (file)
index 0000000..cab3397
--- /dev/null
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+extern int     dbread(void *buf, int nblocks, xfs_fileoff_t bno,
+                      int whichfork);
diff --git a/db/debug.c b/db/debug.c
new file mode 100644 (file)
index 0000000..093079d
--- /dev/null
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "command.h"
+#include "debug.h"
+#include "output.h"
+
+static int     debug_f(int argc, char **argv);
+
+static const cmdinfo_t debug_cmd =
+       { "debug", NULL, debug_f, 0, 1, 0, "[flagbits]",
+         "set debug option bits", NULL };
+
+long   debug_state;
+
+static int
+debug_f(
+       int     argc,
+       char    **argv)
+{
+       char    *p;
+
+       if (argc > 1) {
+               debug_state = strtol(argv[1], &p, 0);
+               if (*p != '\0') {
+                       dbprintf("bad value for debug %s\n", argv[1]);
+                       return 0;
+               }
+       }
+       dbprintf("debug = %ld\n", debug_state);
+       return 0;
+}
+
+void
+debug_init(void)
+{
+       add_command(&debug_cmd);
+}
diff --git a/db/debug.h b/db/debug.h
new file mode 100644 (file)
index 0000000..1224b59
--- /dev/null
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#define        DEBUG_FLIST     0x1
+
+extern long    debug_state;
+extern void    debug_init(void);
diff --git a/db/dir.c b/db/dir.c
new file mode 100644 (file)
index 0000000..32b03fa
--- /dev/null
+++ b/db/dir.c
@@ -0,0 +1,270 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "bit.h"
+#include "type.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "dir.h"
+#include "io.h"
+#include "data.h"
+#include "mount.h"
+
+static int     dir_leaf_entries_count(void *obj, int startoff);
+static int     dir_leaf_hdr_count(void *obj, int startoff);
+static int     dir_leaf_name_count(void *obj, int startoff);
+static int     dir_leaf_namelist_count(void *obj, int startoff);
+static int     dir_leaf_namelist_offset(void *obj, int startoff, int idx);
+static int     dir_node_btree_count(void *obj, int startoff);
+static int     dir_node_hdr_count(void *obj, int startoff);
+
+const field_t  dir_hfld[] = {
+       { "", FLDT_DIR, OI(0), C1, 0, TYP_NONE },
+       { NULL }
+};
+
+#define        LOFF(f) bitize(offsetof(xfs_dir_leafblock_t, f))
+#define        NOFF(f) bitize(offsetof(xfs_da_intnode_t, f))
+const field_t  dir_flds[] = {
+       { "lhdr", FLDT_DIR_LEAF_HDR, OI(LOFF(hdr)), dir_leaf_hdr_count,
+         FLD_COUNT, TYP_NONE },
+       { "nhdr", FLDT_DIR_NODE_HDR, OI(NOFF(hdr)), dir_node_hdr_count,
+         FLD_COUNT, TYP_NONE },
+       { "entries", FLDT_DIR_LEAF_ENTRY, OI(LOFF(entries)),
+         dir_leaf_entries_count, FLD_ARRAY|FLD_COUNT, TYP_NONE },
+       { "btree", FLDT_DIR_NODE_ENTRY, OI(NOFF(btree)),
+         dir_node_btree_count, FLD_ARRAY|FLD_COUNT, TYP_NONE },
+       { "namelist", FLDT_DIR_LEAF_NAME, dir_leaf_namelist_offset,
+         dir_leaf_namelist_count, FLD_ARRAY|FLD_OFFSET|FLD_COUNT, TYP_NONE },
+       { NULL }
+};
+
+#define        BOFF(f) bitize(offsetof(xfs_da_blkinfo_t, f))
+const field_t  dir_blkinfo_flds[] = {
+       { "forw", FLDT_DIRBLOCK, OI(BOFF(forw)), C1, 0, TYP_INODATA },
+       { "back", FLDT_DIRBLOCK, OI(BOFF(back)), C1, 0, TYP_INODATA },
+       { "magic", FLDT_UINT16X, OI(BOFF(magic)), C1, 0, TYP_NONE },
+       { "pad", FLDT_UINT16X, OI(BOFF(pad)), C1, FLD_SKIPALL, TYP_NONE },
+       { NULL }
+};
+
+#define        LEOFF(f)        bitize(offsetof(xfs_dir_leaf_entry_t, f))
+const field_t  dir_leaf_entry_flds[] = {
+       { "hashval", FLDT_UINT32X, OI(LEOFF(hashval)), C1, 0, TYP_NONE },
+       { "nameidx", FLDT_UINT16D, OI(LEOFF(nameidx)), C1, 0, TYP_NONE },
+       { "namelen", FLDT_UINT8D, OI(LEOFF(namelen)), C1, 0, TYP_NONE },
+       { "pad2", FLDT_UINT8X, OI(LEOFF(pad2)), C1, FLD_SKIPALL, TYP_NONE },
+       { NULL }
+};
+
+#define        LHOFF(f)        bitize(offsetof(xfs_dir_leaf_hdr_t, f))
+const field_t  dir_leaf_hdr_flds[] = {
+       { "info", FLDT_DIR_BLKINFO, OI(LHOFF(info)), C1, 0, TYP_NONE },
+       { "count", FLDT_UINT16D, OI(LHOFF(count)), C1, 0, TYP_NONE },
+       { "namebytes", FLDT_UINT16D, OI(LHOFF(namebytes)), C1, 0, TYP_NONE },
+       { "firstused", FLDT_UINT16D, OI(LHOFF(firstused)), C1, 0, TYP_NONE },
+       { "holes", FLDT_UINT8D, OI(LHOFF(holes)), C1, 0, TYP_NONE },
+       { "pad1", FLDT_UINT8X, OI(LHOFF(pad1)), C1, FLD_SKIPALL, TYP_NONE },
+       { "freemap", FLDT_DIR_LEAF_MAP, OI(LHOFF(freemap)),
+         CI(XFS_DIR_LEAF_MAPSIZE), FLD_ARRAY, TYP_NONE },
+       { NULL }
+};
+
+#define        LMOFF(f)        bitize(offsetof(xfs_dir_leaf_map_t, f))
+const field_t  dir_leaf_map_flds[] = {
+       { "base", FLDT_UINT16D, OI(LMOFF(base)), C1, 0, TYP_NONE },
+       { "size", FLDT_UINT16D, OI(LMOFF(size)), C1, 0, TYP_NONE },
+       { NULL }
+};
+
+#define        LNOFF(f)        bitize(offsetof(xfs_dir_leaf_name_t, f))
+const field_t  dir_leaf_name_flds[] = {
+       { "inumber", FLDT_DIR_INO, OI(LNOFF(inumber)), C1, 0, TYP_INODE },
+       { "name", FLDT_CHARNS, OI(LNOFF(name)), dir_leaf_name_count, FLD_COUNT,
+         TYP_NONE },
+       { NULL }
+};
+
+#define        EOFF(f) bitize(offsetof(xfs_da_node_entry_t, f))
+const field_t  dir_node_entry_flds[] = {
+       { "hashval", FLDT_UINT32X, OI(EOFF(hashval)), C1, 0, TYP_NONE },
+       { "before", FLDT_DIRBLOCK, OI(EOFF(before)), C1, 0, TYP_INODATA },
+       { NULL }
+};
+
+#define        HOFF(f) bitize(offsetof(xfs_da_node_hdr_t, f))
+const field_t  dir_node_hdr_flds[] = {
+       { "info", FLDT_DIR_BLKINFO, OI(HOFF(info)), C1, 0, TYP_NONE },
+       { "count", FLDT_UINT16D, OI(HOFF(count)), C1, 0, TYP_NONE },
+       { "level", FLDT_UINT16D, OI(HOFF(level)), C1, 0, TYP_NONE },
+       { NULL }
+};
+
+/*ARGSUSED*/
+static int
+dir_leaf_entries_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_dir_leafblock_t     *block;
+       
+       ASSERT(startoff == 0);
+       block = obj;
+       if (INT_GET(block->hdr.info.magic, ARCH_CONVERT) != XFS_DIR_LEAF_MAGIC)
+               return 0;
+       return INT_GET(block->hdr.count, ARCH_CONVERT);
+}
+
+/*ARGSUSED*/
+static int
+dir_leaf_hdr_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_dir_leafblock_t     *block;
+       
+       ASSERT(startoff == 0);
+       block = obj;
+       return INT_GET(block->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC;
+}
+
+static int
+dir_leaf_name_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_dir_leafblock_t     *block;
+       xfs_dir_leaf_entry_t    *e;
+       int                     i;
+       int                     off;
+
+       ASSERT(bitoffs(startoff) == 0);
+       off = byteize(startoff);
+       block = obj;
+       if (INT_GET(block->hdr.info.magic, ARCH_CONVERT) != XFS_DIR_LEAF_MAGIC)
+               return 0;
+       for (i = 0; i < INT_GET(block->hdr.count, ARCH_CONVERT); i++) {
+               e = &block->entries[i];
+               if (INT_GET(e->nameidx, ARCH_CONVERT) == off)
+                       return e->namelen;
+       }
+       return 0;
+}
+
+/*ARGSUSED*/
+int
+dir_leaf_name_size(
+       void                    *obj,
+       int                     startoff,
+       int                     idx)
+{
+       xfs_dir_leafblock_t     *block;
+       xfs_dir_leaf_entry_t    *e;
+
+       ASSERT(startoff == 0);
+       block = obj;
+       if (INT_GET(block->hdr.info.magic, ARCH_CONVERT) != XFS_DIR_LEAF_MAGIC)
+               return 0;
+       e = &block->entries[idx];
+       return bitize((int)XFS_DIR_LEAF_ENTSIZE_BYENTRY(e));
+}
+
+/*ARGSUSED*/
+static int
+dir_leaf_namelist_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_dir_leafblock_t     *block;
+
+       ASSERT(startoff == 0);
+       block = obj;
+       if (INT_GET(block->hdr.info.magic, ARCH_CONVERT) != XFS_DIR_LEAF_MAGIC)
+               return 0;
+       return INT_GET(block->hdr.count, ARCH_CONVERT);
+}
+
+/*ARGSUSED*/
+static int
+dir_leaf_namelist_offset(
+       void                    *obj,
+       int                     startoff,
+       int                     idx)
+{
+       xfs_dir_leafblock_t     *block;
+       xfs_dir_leaf_entry_t    *e;
+
+       ASSERT(startoff == 0);
+       block = obj;
+       e = &block->entries[idx];
+       return bitize(INT_GET(e->nameidx, ARCH_CONVERT));
+}
+
+/*ARGSUSED*/
+static int
+dir_node_btree_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_da_intnode_t        *block;
+
+       ASSERT(startoff == 0);          /* this is a base structure */
+       block = obj;
+       if (INT_GET(block->hdr.info.magic, ARCH_CONVERT) != XFS_DA_NODE_MAGIC)
+               return 0;
+       return INT_GET(block->hdr.count, ARCH_CONVERT);
+}
+
+/*ARGSUSED*/
+static int
+dir_node_hdr_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_da_intnode_t        *block;
+
+       ASSERT(startoff == 0);
+       block = obj;
+       return INT_GET(block->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC;
+}
+
+/*ARGSUSED*/
+int
+dir_size(
+       void    *obj,
+       int     startoff,
+       int     idx)
+{
+       return bitize(mp->m_sb.sb_blocksize);
+}
diff --git a/db/dir.h b/db/dir.h
new file mode 100644 (file)
index 0000000..ee28b44
--- /dev/null
+++ b/db/dir.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+extern const field_t   dir_flds[];
+extern const field_t   dir_hfld[];
+extern const field_t   dir_blkinfo_flds[];
+extern const field_t   dir_leaf_entry_flds[];
+extern const field_t   dir_leaf_hdr_flds[];
+extern const field_t   dir_leaf_map_flds[];
+extern const field_t   dir_leaf_name_flds[];
+extern const field_t   dir_node_entry_flds[];
+extern const field_t   dir_node_hdr_flds[];
+
+extern int     dir_leaf_name_size(void *obj, int startoff, int idx);
+extern int     dir_size(void *obj, int startoff, int idx);
diff --git a/db/dir2.c b/db/dir2.c
new file mode 100644 (file)
index 0000000..b8b440c
--- /dev/null
+++ b/db/dir2.c
@@ -0,0 +1,727 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "bit.h"
+#include "type.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "dir.h"
+#include "dir2.h"
+#include "mount.h"
+#include "data.h"
+
+static int     dir2_block_hdr_count(void *obj, int startoff);
+static int     dir2_block_leaf_count(void *obj, int startoff);
+static int     dir2_block_leaf_offset(void *obj, int startoff, int idx);
+static int     dir2_block_tail_count(void *obj, int startoff);
+static int     dir2_block_tail_offset(void *obj, int startoff, int idx);
+static int     dir2_block_u_count(void *obj, int startoff);
+static int     dir2_block_u_offset(void *obj, int startoff, int idx);
+static int     dir2_data_union_freetag_count(void *obj, int startoff);
+static int     dir2_data_union_inumber_count(void *obj, int startoff);
+static int     dir2_data_union_length_count(void *obj, int startoff);
+static int     dir2_data_union_name_count(void *obj, int startoff);
+static int     dir2_data_union_namelen_count(void *obj, int startoff);
+static int     dir2_data_union_tag_count(void *obj, int startoff);
+static int     dir2_data_union_tag_offset(void *obj, int startoff, int idx);
+static int     dir2_data_hdr_count(void *obj, int startoff);
+static int     dir2_data_u_count(void *obj, int startoff);
+static int     dir2_data_u_offset(void *obj, int startoff, int idx);
+static int     dir2_free_bests_count(void *obj, int startoff);
+static int     dir2_free_hdr_count(void *obj, int startoff);
+static int     dir2_leaf_bests_count(void *obj, int startoff);
+static int     dir2_leaf_bests_offset(void *obj, int startoff, int idx);
+static int     dir2_leaf_ents_count(void *obj, int startoff);
+static int     dir2_leaf_hdr_count(void *obj, int startoff);
+static int     dir2_leaf_tail_count(void *obj, int startoff);
+static int     dir2_leaf_tail_offset(void *obj, int startoff, int idx);
+static int     dir2_node_btree_count(void *obj, int startoff);
+static int     dir2_node_hdr_count(void *obj, int startoff);
+
+const field_t  dir2_hfld[] = {
+       { "", FLDT_DIR2, OI(0), C1, 0, TYP_NONE },
+       { NULL }
+};
+
+#define        BOFF(f) bitize(offsetof(xfs_dir2_block_t, f))
+#define        DOFF(f) bitize(offsetof(xfs_dir2_data_t, f))
+#define        FOFF(f) bitize(offsetof(xfs_dir2_free_t, f))
+#define        LOFF(f) bitize(offsetof(xfs_dir2_leaf_t, f))
+#define        NOFF(f) bitize(offsetof(xfs_da_intnode_t, f))
+const field_t  dir2_flds[] = {
+       { "bhdr", FLDT_DIR2_DATA_HDR, OI(BOFF(hdr)), dir2_block_hdr_count,
+         FLD_COUNT, TYP_NONE },
+       { "bu", FLDT_DIR2_DATA_UNION, dir2_block_u_offset, dir2_block_u_count,
+         FLD_ARRAY|FLD_OFFSET|FLD_COUNT, TYP_NONE },
+       { "bleaf", FLDT_DIR2_LEAF_ENTRY, dir2_block_leaf_offset,
+         dir2_block_leaf_count, FLD_ARRAY|FLD_OFFSET|FLD_COUNT, TYP_NONE },
+       { "btail", FLDT_DIR2_BLOCK_TAIL, dir2_block_tail_offset,
+         dir2_block_tail_count, FLD_OFFSET|FLD_COUNT, TYP_NONE },
+       { "dhdr", FLDT_DIR2_DATA_HDR, OI(DOFF(hdr)), dir2_data_hdr_count,
+         FLD_COUNT, TYP_NONE },
+       { "du", FLDT_DIR2_DATA_UNION, dir2_data_u_offset, dir2_data_u_count,
+         FLD_ARRAY|FLD_OFFSET|FLD_COUNT, TYP_NONE },
+       { "lhdr", FLDT_DIR2_LEAF_HDR, OI(LOFF(hdr)), dir2_leaf_hdr_count,
+         FLD_COUNT, TYP_NONE },
+       { "lbests", FLDT_DIR2_DATA_OFF, dir2_leaf_bests_offset,
+         dir2_leaf_bests_count, FLD_ARRAY|FLD_OFFSET|FLD_COUNT, TYP_NONE },
+       { "lents", FLDT_DIR2_LEAF_ENTRY, OI(LOFF(ents)), dir2_leaf_ents_count,
+         FLD_ARRAY|FLD_COUNT, TYP_NONE },
+       { "ltail", FLDT_DIR2_LEAF_TAIL, dir2_leaf_tail_offset,
+         dir2_leaf_tail_count, FLD_OFFSET|FLD_COUNT, TYP_NONE },
+       { "nhdr", FLDT_DIR_NODE_HDR, OI(NOFF(hdr)), dir2_node_hdr_count,
+         FLD_COUNT, TYP_NONE },
+       { "nbtree", FLDT_DIR_NODE_ENTRY, OI(NOFF(btree)), dir2_node_btree_count,
+         FLD_ARRAY|FLD_COUNT, TYP_NONE },
+       { "fhdr", FLDT_DIR2_FREE_HDR, OI(FOFF(hdr)), dir2_free_hdr_count,
+         FLD_COUNT, TYP_NONE },
+       { "fbests", FLDT_DIR2_DATA_OFFNZ, OI(FOFF(bests)),
+         dir2_free_bests_count, FLD_ARRAY|FLD_COUNT, TYP_NONE },
+       { NULL }
+};
+
+#define        BTOFF(f)        bitize(offsetof(xfs_dir2_block_tail_t, f))
+const field_t  dir2_block_tail_flds[] = {
+       { "count", FLDT_UINT32D, OI(BTOFF(count)), C1, 0, TYP_NONE },
+       { "stale", FLDT_UINT32D, OI(BTOFF(stale)), C1, 0, TYP_NONE },
+       { NULL }
+};
+
+#define        DFOFF(f)        bitize(offsetof(xfs_dir2_data_free_t, f))
+const field_t  dir2_data_free_flds[] = {
+       { "offset", FLDT_DIR2_DATA_OFF, OI(DFOFF(offset)), C1, 0, TYP_NONE },
+       { "length", FLDT_DIR2_DATA_OFF, OI(DFOFF(length)), C1, 0, TYP_NONE },
+       { NULL }
+};
+
+#define        DHOFF(f)        bitize(offsetof(xfs_dir2_data_hdr_t, f))
+const field_t  dir2_data_hdr_flds[] = {
+       { "magic", FLDT_UINT32X, OI(DHOFF(magic)), C1, 0, TYP_NONE },
+       { "bestfree", FLDT_DIR2_DATA_FREE, OI(DHOFF(bestfree)),
+         CI(XFS_DIR2_DATA_FD_COUNT), FLD_ARRAY, TYP_NONE },
+       { NULL }
+};
+
+#define        DEOFF(f)        bitize(offsetof(xfs_dir2_data_entry_t, f))
+#define        DUOFF(f)        bitize(offsetof(xfs_dir2_data_unused_t, f))
+const field_t  dir2_data_union_flds[] = {
+       { "freetag", FLDT_UINT16X, OI(DUOFF(freetag)),
+         dir2_data_union_freetag_count, FLD_COUNT, TYP_NONE },
+       { "inumber", FLDT_INO, OI(DEOFF(inumber)),
+         dir2_data_union_inumber_count, FLD_COUNT, TYP_INODE },
+       { "length", FLDT_DIR2_DATA_OFF, OI(DUOFF(length)),
+         dir2_data_union_length_count, FLD_COUNT, TYP_NONE },
+       { "namelen", FLDT_UINT8D, OI(DEOFF(namelen)),
+         dir2_data_union_namelen_count, FLD_COUNT, TYP_NONE },
+       { "name", FLDT_CHARNS, OI(DEOFF(name)), dir2_data_union_name_count,
+         FLD_COUNT, TYP_NONE },
+       { "tag", FLDT_DIR2_DATA_OFF, dir2_data_union_tag_offset,
+         dir2_data_union_tag_count, FLD_OFFSET|FLD_COUNT, TYP_NONE },
+       { NULL }
+};
+
+#define        LEOFF(f)        bitize(offsetof(xfs_dir2_leaf_entry_t, f))
+const field_t  dir2_leaf_entry_flds[] = {
+       { "hashval", FLDT_UINT32X, OI(LEOFF(hashval)), C1, 0, TYP_NONE },
+       { "address", FLDT_UINT32X, OI(LEOFF(address)), C1, 0, TYP_NONE },
+       { NULL }
+};
+
+#define        LHOFF(f)        bitize(offsetof(xfs_dir2_leaf_hdr_t, f))
+const field_t  dir2_leaf_hdr_flds[] = {
+       { "info", FLDT_DIR_BLKINFO, OI(LHOFF(info)), C1, 0, TYP_NONE },
+       { "count", FLDT_UINT16D, OI(LHOFF(count)), C1, 0, TYP_NONE },
+       { "stale", FLDT_UINT16D, OI(LHOFF(stale)), C1, 0, TYP_NONE },
+       { NULL }
+};
+
+#define        LTOFF(f)        bitize(offsetof(xfs_dir2_leaf_tail_t, f))
+const field_t  dir2_leaf_tail_flds[] = {
+       { "bestcount", FLDT_UINT32D, OI(LTOFF(bestcount)), C1, 0, TYP_NONE },
+       { NULL }
+};
+
+#define        FHOFF(f)        bitize(offsetof(xfs_dir2_free_hdr_t, f))
+const field_t  dir2_free_hdr_flds[] = {
+       { "magic", FLDT_UINT32X, OI(FHOFF(magic)), C1, 0, TYP_NONE },
+       { "firstdb", FLDT_INT32D, OI(FHOFF(firstdb)), C1, 0, TYP_NONE },
+       { "nvalid", FLDT_INT32D, OI(FHOFF(nvalid)), C1, 0, TYP_NONE },
+       { "nused", FLDT_INT32D, OI(FHOFF(nused)), C1, 0, TYP_NONE },
+       { NULL }
+};
+
+/*ARGSUSED*/
+static int
+dir2_block_hdr_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_dir2_block_t        *block;
+
+       ASSERT(startoff == 0);
+       block = obj;
+       return INT_GET(block->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC;
+}
+
+/*ARGSUSED*/
+static int
+dir2_block_leaf_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_dir2_block_t        *block;
+       xfs_dir2_block_tail_t   *btp;
+
+       ASSERT(startoff == 0);
+       block = obj;
+       if (INT_GET(block->hdr.magic, ARCH_CONVERT) != XFS_DIR2_BLOCK_MAGIC)
+               return 0;
+       btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+       return INT_GET(btp->count, ARCH_CONVERT);
+}
+
+/*ARGSUSED*/
+static int
+dir2_block_leaf_offset(
+       void                    *obj,
+       int                     startoff,
+       int                     idx)
+{
+       xfs_dir2_block_t        *block;
+       xfs_dir2_block_tail_t   *btp;
+       xfs_dir2_leaf_entry_t   *lep;
+
+       ASSERT(startoff == 0);
+       block = obj;
+       ASSERT(INT_GET(block->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC);
+       btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+       lep = XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT) + idx;
+       return bitize((int)((char *)lep - (char *)block));
+}
+
+/*ARGSUSED*/
+static int
+dir2_block_tail_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_dir2_block_t        *block;
+
+       ASSERT(startoff == 0);
+       block = obj;
+       return INT_GET(block->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC;
+}
+
+/*ARGSUSED*/
+static int
+dir2_block_tail_offset(
+       void                    *obj,
+       int                     startoff,
+       int                     idx)
+{
+       xfs_dir2_block_t        *block;
+       xfs_dir2_block_tail_t   *btp;
+
+       ASSERT(startoff == 0);
+       ASSERT(idx == 0);
+       block = obj;
+       ASSERT(INT_GET(block->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC);
+       btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+       return bitize((int)((char *)btp - (char *)block));
+}
+
+/*ARGSUSED*/
+static int
+dir2_block_u_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_dir2_block_t        *block;
+       xfs_dir2_block_tail_t   *btp;
+       xfs_dir2_data_entry_t   *dep;
+       xfs_dir2_data_unused_t  *dup;
+       char                    *endptr;
+       int                     i;
+       char                    *ptr;
+
+       ASSERT(startoff == 0);
+       block = obj;
+       if (INT_GET(block->hdr.magic, ARCH_CONVERT) != XFS_DIR2_BLOCK_MAGIC)
+               return 0;
+       btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+       ptr = (char *)block->u;
+       endptr = (char *)XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT);
+       for (i = 0; ptr < endptr; i++) {
+               dup = (xfs_dir2_data_unused_t *)ptr;
+               if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG)
+                       ptr += INT_GET(dup->length, ARCH_CONVERT);
+               else {
+                       dep = (xfs_dir2_data_entry_t *)ptr;
+                       ptr += XFS_DIR2_DATA_ENTSIZE(dep->namelen);
+               }
+       }
+       return i;
+}
+
+/*ARGSUSED*/
+static int
+dir2_block_u_offset(
+       void                    *obj,
+       int                     startoff,
+       int                     idx)
+{
+       xfs_dir2_block_t        *block;
+       xfs_dir2_block_tail_t   *btp;
+       xfs_dir2_data_entry_t   *dep;
+       xfs_dir2_data_unused_t  *dup;
+                               /*REFERENCED*/
+       char                    *endptr;
+       int                     i;
+       char                    *ptr;
+
+       ASSERT(startoff == 0);
+       block = obj;
+       ASSERT(INT_GET(block->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC);
+       btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+       ptr = (char *)block->u;
+       endptr = (char *)XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT);
+       for (i = 0; i < idx; i++) {
+               ASSERT(ptr < endptr);
+               dup = (xfs_dir2_data_unused_t *)ptr;
+               if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG)
+                       ptr += INT_GET(dup->length, ARCH_CONVERT);
+               else {
+                       dep = (xfs_dir2_data_entry_t *)ptr;
+                       ptr += XFS_DIR2_DATA_ENTSIZE(dep->namelen);
+               }
+       }
+       return bitize((int)(ptr - (char *)block));
+}
+
+static int
+dir2_data_union_freetag_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_dir2_data_unused_t  *dup;
+       char                    *end;
+
+       ASSERT(bitoffs(startoff) == 0);
+       dup = (xfs_dir2_data_unused_t *)((char *)obj + byteize(startoff));
+       end = (char *)&dup->freetag + sizeof(dup->freetag);
+       return end <= (char *)obj + mp->m_dirblksize &&
+              INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG;
+}
+
+static int
+dir2_data_union_inumber_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_dir2_data_entry_t   *dep;
+       xfs_dir2_data_unused_t  *dup;
+       char                    *end;
+
+       ASSERT(bitoffs(startoff) == 0);
+       dup = (xfs_dir2_data_unused_t *)((char *)obj + byteize(startoff));
+       dep = (xfs_dir2_data_entry_t *)dup;
+       end = (char *)&dep->inumber + sizeof(dep->inumber);
+       return end <= (char *)obj + mp->m_dirblksize &&
+              INT_GET(dup->freetag, ARCH_CONVERT) != XFS_DIR2_DATA_FREE_TAG;
+}
+
+static int
+dir2_data_union_length_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_dir2_data_unused_t  *dup;
+       char                    *end;
+
+       ASSERT(bitoffs(startoff) == 0);
+       dup = (xfs_dir2_data_unused_t *)((char *)obj + byteize(startoff));
+       end = (char *)&dup->length + sizeof(dup->length);
+       return end <= (char *)obj + mp->m_dirblksize &&
+              INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG;
+}
+
+static int
+dir2_data_union_name_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_dir2_data_entry_t   *dep;
+       xfs_dir2_data_unused_t  *dup;
+       char                    *end;
+
+       ASSERT(bitoffs(startoff) == 0);
+       dup = (xfs_dir2_data_unused_t *)((char *)obj + byteize(startoff));
+       dep = (xfs_dir2_data_entry_t *)dup;
+       end = (char *)&dep->namelen + sizeof(dep->namelen);
+       if (end >= (char *)obj + mp->m_dirblksize ||
+           INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG)
+               return 0;
+       end = (char *)&dep->name[0] + dep->namelen;
+       return end <= (char *)obj + mp->m_dirblksize ? dep->namelen : 0;
+}
+
+static int
+dir2_data_union_namelen_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_dir2_data_entry_t   *dep;
+       xfs_dir2_data_unused_t  *dup;
+       char                    *end;
+
+       ASSERT(bitoffs(startoff) == 0);
+       dup = (xfs_dir2_data_unused_t *)((char *)obj + byteize(startoff));
+       dep = (xfs_dir2_data_entry_t *)dup;
+       end = (char *)&dep->namelen + sizeof(dep->namelen);
+       return end <= (char *)obj + mp->m_dirblksize &&
+              INT_GET(dup->freetag, ARCH_CONVERT) != XFS_DIR2_DATA_FREE_TAG;
+}
+
+static int
+dir2_data_union_tag_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_dir2_data_entry_t   *dep;
+       xfs_dir2_data_unused_t  *dup;
+       char                    *end;
+       xfs_dir2_data_off_t     *tagp;
+
+       ASSERT(bitoffs(startoff) == 0);
+       dup = (xfs_dir2_data_unused_t *)((char *)obj + byteize(startoff));
+       dep = (xfs_dir2_data_entry_t *)dup;
+       end = (char *)&dup->freetag + sizeof(dup->freetag);
+       if (end > (char *)obj + mp->m_dirblksize)
+               return 0;
+       if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG) {
+               end = (char *)&dup->length + sizeof(dup->length);
+               if (end > (char *)obj + mp->m_dirblksize)
+                       return 0;
+               tagp = XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(dup, ARCH_CONVERT);
+       } else {
+               end = (char *)&dep->namelen + sizeof(dep->namelen);
+               if (end > (char *)obj + mp->m_dirblksize)
+                       return 0;
+               tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
+       }
+       end = (char *)tagp + sizeof(*tagp);
+       return end <= (char *)obj + mp->m_dirblksize;
+}
+
+/*ARGSUSED*/
+static int
+dir2_data_union_tag_offset(
+       void                    *obj,
+       int                     startoff,
+       int                     idx)
+{
+       xfs_dir2_data_entry_t   *dep;
+       xfs_dir2_data_unused_t  *dup;
+
+       ASSERT(bitoffs(startoff) == 0);
+       ASSERT(idx == 0);
+       dup = (xfs_dir2_data_unused_t *)((char *)obj + byteize(startoff));
+       if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG)
+               return bitize((int)((char *)XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(dup, ARCH_CONVERT) -
+                                   (char *)dup));
+       dep = (xfs_dir2_data_entry_t *)dup;
+       return bitize((int)((char *)XFS_DIR2_DATA_ENTRY_TAG_P(dep) -
+                           (char *)dep));
+}
+
+/*ARGSUSED*/
+static int
+dir2_data_hdr_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_dir2_data_t         *data;
+
+       ASSERT(startoff == 0);
+       data = obj;
+       return INT_GET(data->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC;
+}
+
+/*ARGSUSED*/
+static int
+dir2_data_u_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_dir2_data_t         *data;
+       xfs_dir2_data_entry_t   *dep;
+       xfs_dir2_data_unused_t  *dup;
+       char                    *endptr;
+       int                     i;
+       char                    *ptr;
+
+       ASSERT(startoff == 0);
+       data = obj;
+       if (INT_GET(data->hdr.magic, ARCH_CONVERT) != XFS_DIR2_DATA_MAGIC)
+               return 0;
+       ptr = (char *)data->u;
+       endptr = (char *)data + mp->m_dirblksize;
+       for (i = 0; ptr < endptr; i++) {
+               dup = (xfs_dir2_data_unused_t *)ptr;
+               if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG)
+                       ptr += INT_GET(dup->length, ARCH_CONVERT);
+               else {
+                       dep = (xfs_dir2_data_entry_t *)ptr;
+                       ptr += XFS_DIR2_DATA_ENTSIZE(dep->namelen);
+               }
+       }
+       return i;
+}
+
+/*ARGSUSED*/
+static int
+dir2_data_u_offset(
+       void                    *obj,
+       int                     startoff,
+       int                     idx)
+{
+       xfs_dir2_data_t         *data;
+       xfs_dir2_data_entry_t   *dep;
+       xfs_dir2_data_unused_t  *dup;
+                               /*REFERENCED*/
+       char                    *endptr;
+       int                     i;
+       char                    *ptr;
+
+       ASSERT(startoff == 0);
+       data = obj;
+       ASSERT(INT_GET(data->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC);
+       ptr = (char *)data->u;
+       endptr = (char *)data + mp->m_dirblksize;
+       for (i = 0; i < idx; i++) {
+               ASSERT(ptr < endptr);
+               dup = (xfs_dir2_data_unused_t *)ptr;
+               if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG)
+                       ptr += INT_GET(dup->length, ARCH_CONVERT);
+               else {
+                       dep = (xfs_dir2_data_entry_t *)ptr;
+                       ptr += XFS_DIR2_DATA_ENTSIZE(dep->namelen);
+               }
+       }
+       return bitize((int)(ptr - (char *)data));
+}
+
+/*ARGSUSED*/
+int
+dir2_data_union_size(
+       void                    *obj,
+       int                     startoff,
+       int                     idx)
+{
+       xfs_dir2_data_entry_t   *dep;
+       xfs_dir2_data_unused_t  *dup;
+
+       ASSERT(bitoffs(startoff) == 0);
+       ASSERT(idx == 0);
+       dup = (xfs_dir2_data_unused_t *)((char *)obj + byteize(startoff));
+       if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG)
+               return bitize(INT_GET(dup->length, ARCH_CONVERT));
+       else {
+               dep = (xfs_dir2_data_entry_t *)dup;
+               return bitize(XFS_DIR2_DATA_ENTSIZE(dep->namelen));
+       }
+}
+
+/*ARGSUSED*/
+static int
+dir2_free_bests_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_dir2_free_t         *free;
+
+       ASSERT(startoff == 0);
+       free = obj;
+       if (INT_GET(free->hdr.magic, ARCH_CONVERT) != XFS_DIR2_FREE_MAGIC)
+               return 0;
+       return INT_GET(free->hdr.nvalid, ARCH_CONVERT);
+}
+
+/*ARGSUSED*/
+static int
+dir2_free_hdr_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_dir2_free_t         *free;
+
+       ASSERT(startoff == 0);
+       free = obj;
+       return INT_GET(free->hdr.magic, ARCH_CONVERT) == XFS_DIR2_FREE_MAGIC;
+}
+
+/*ARGSUSED*/
+static int
+dir2_leaf_bests_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_dir2_leaf_t         *leaf;
+       xfs_dir2_leaf_tail_t    *ltp;
+
+       ASSERT(startoff == 0);
+       leaf = obj;
+       if (INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) != XFS_DIR2_LEAF1_MAGIC)
+               return 0;
+       ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+       return INT_GET(ltp->bestcount, ARCH_CONVERT);
+}
+
+/*ARGSUSED*/
+static int
+dir2_leaf_bests_offset(
+       void                    *obj,
+       int                     startoff,
+       int                     idx)
+{
+       xfs_dir2_data_off_t     *lbp;
+       xfs_dir2_leaf_t         *leaf;
+       xfs_dir2_leaf_tail_t    *ltp;
+
+       ASSERT(startoff == 0);
+       leaf = obj;
+       ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAF1_MAGIC);
+       ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+       lbp = XFS_DIR2_LEAF_BESTS_P_ARCH(ltp, ARCH_CONVERT) + idx;
+       return bitize((int)((char *)lbp - (char *)leaf));
+}
+
+/*ARGSUSED*/
+static int
+dir2_leaf_ents_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_dir2_leaf_t         *leaf;
+
+       ASSERT(startoff == 0);
+       leaf = obj;
+       if (INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) != XFS_DIR2_LEAF1_MAGIC &&
+           INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) != XFS_DIR2_LEAFN_MAGIC)
+               return 0;
+       return INT_GET(leaf->hdr.count, ARCH_CONVERT);
+}
+
+/*ARGSUSED*/
+static int
+dir2_leaf_hdr_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_dir2_leaf_t         *leaf;
+       
+       ASSERT(startoff == 0);
+       leaf = obj;
+       return INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAF1_MAGIC ||
+              INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC;
+}
+
+/*ARGSUSED*/
+static int
+dir2_leaf_tail_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_dir2_leaf_t         *leaf;
+
+       ASSERT(startoff == 0);
+       leaf = obj;
+       return INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAF1_MAGIC;
+}
+
+/*ARGSUSED*/
+static int
+dir2_leaf_tail_offset(
+       void                    *obj,
+       int                     startoff,
+       int                     idx)
+{
+       xfs_dir2_leaf_t         *leaf;
+       xfs_dir2_leaf_tail_t    *ltp;
+
+       ASSERT(startoff == 0);
+       ASSERT(idx == 0);
+       leaf = obj;
+       ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAF1_MAGIC);
+       ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+       return bitize((int)((char *)ltp - (char *)leaf));
+}
+
+/*ARGSUSED*/
+static int
+dir2_node_btree_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_da_intnode_t        *node;
+
+       ASSERT(startoff == 0);
+       node = obj;
+       if (INT_GET(node->hdr.info.magic, ARCH_CONVERT) != XFS_DA_NODE_MAGIC)
+               return 0;
+       return INT_GET(node->hdr.count, ARCH_CONVERT);
+}
+
+/*ARGSUSED*/
+static int
+dir2_node_hdr_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_da_intnode_t        *node;
+
+       ASSERT(startoff == 0);
+       node = obj;
+       return INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC;
+}
+
+/*ARGSUSED*/
+int
+dir2_size(
+       void    *obj,
+       int     startoff,
+       int     idx)
+{
+       return bitize(mp->m_dirblksize);
+}
diff --git a/db/dir2.h b/db/dir2.h
new file mode 100644 (file)
index 0000000..a1516c9
--- /dev/null
+++ b/db/dir2.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+extern const field_t   dir2_flds[];
+extern const field_t   dir2_hfld[];
+extern const field_t   dir2_block_tail_flds[];
+extern const field_t   dir2_data_free_flds[];
+extern const field_t   dir2_data_hdr_flds[];
+extern const field_t   dir2_data_union_flds[];
+extern const field_t   dir2_free_hdr_flds[];
+extern const field_t   dir2_leaf_entry_flds[];
+extern const field_t   dir2_leaf_hdr_flds[];
+extern const field_t   dir2_leaf_tail_flds[];
+
+extern int     dir2_data_union_size(void *obj, int startoff, int idx);
+extern int     dir2_size(void *obj, int startoff, int idx);
diff --git a/db/dir2sf.c b/db/dir2sf.c
new file mode 100644 (file)
index 0000000..9d8c35f
--- /dev/null
@@ -0,0 +1,235 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "type.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "bit.h"
+#include "dir2sf.h"
+
+static int     dir2_inou_i4_count(void *obj, int startoff);
+static int     dir2_inou_i8_count(void *obj, int startoff);
+static int     dir2_sf_entry_inumber_offset(void *obj, int startoff, int idx);
+static int     dir2_sf_entry_name_count(void *obj, int startoff);
+static int     dir2_sf_list_count(void *obj, int startoff);
+static int     dir2_sf_list_offset(void *obj, int startoff, int idx);
+
+#define        OFF(f)  bitize(offsetof(xfs_dir2_sf_t, f))
+const field_t  dir2sf_flds[] = {
+       { "hdr", FLDT_DIR2_SF_HDR, OI(OFF(hdr)), C1, 0, TYP_NONE },
+       { "list", FLDT_DIR2_SF_ENTRY, dir2_sf_list_offset, dir2_sf_list_count,
+         FLD_ARRAY|FLD_COUNT|FLD_OFFSET, TYP_NONE },
+       { NULL }
+};
+
+#define UOFF(f)        bitize(offsetof(xfs_dir2_inou_t, f))
+const field_t  dir2_inou_flds[] = {
+       { "i8", FLDT_DIR2_INO8, OI(UOFF(i8)), dir2_inou_i8_count, FLD_COUNT,
+         TYP_INODE },
+       { "i4", FLDT_DIR2_INO4, OI(UOFF(i4)), dir2_inou_i4_count, FLD_COUNT,
+         TYP_INODE },
+       { NULL }
+};
+
+#define        HOFF(f) bitize(offsetof(xfs_dir2_sf_hdr_t, f))
+const field_t  dir2_sf_hdr_flds[] = {
+       { "count", FLDT_UINT8D, OI(HOFF(count)), C1, 0, TYP_NONE },
+       { "i8count", FLDT_UINT8D, OI(HOFF(i8count)), C1, 0, TYP_NONE },
+       { "parent", FLDT_DIR2_INOU, OI(HOFF(parent)), C1, 0, TYP_NONE },
+       { NULL }
+};
+
+#define        EOFF(f) bitize(offsetof(xfs_dir2_sf_entry_t, f))
+const field_t  dir2_sf_entry_flds[] = {
+       { "namelen", FLDT_UINT8D, OI(EOFF(namelen)), C1, 0, TYP_NONE },
+       { "offset", FLDT_DIR2_SF_OFF, OI(EOFF(offset)), C1, 0, TYP_NONE },
+       { "name", FLDT_CHARNS, OI(EOFF(name)), dir2_sf_entry_name_count,
+         FLD_COUNT, TYP_NONE },
+       { "inumber", FLDT_DIR2_INOU, dir2_sf_entry_inumber_offset, C1,
+         FLD_OFFSET, TYP_NONE },
+       { NULL }
+};
+
+/*ARGSUSED*/
+static int
+dir2_inou_i4_count(
+       void            *obj,
+       int             startoff)
+{
+       xfs_dir2_sf_t   *sf;
+
+       ASSERT(bitoffs(startoff) == 0);
+       sf = &((xfs_dinode_t *)obj)->di_u.di_dir2sf;
+       return sf->hdr.i8count == 0;
+}
+
+/*ARGSUSED*/
+static int
+dir2_inou_i8_count(
+       void            *obj,
+       int             startoff)
+{
+       xfs_dir2_sf_t   *sf;
+
+       ASSERT(bitoffs(startoff) == 0);
+       sf = &((xfs_dinode_t *)obj)->di_u.di_dir2sf;
+       return sf->hdr.i8count != 0;
+}
+
+/*ARGSUSED*/
+int
+dir2_inou_size(
+       void            *obj,
+       int             startoff,
+       int             idx)
+{
+       xfs_dir2_sf_t   *sf;
+
+       ASSERT(bitoffs(startoff) == 0);
+       ASSERT(idx == 0);
+       sf = &((xfs_dinode_t *)obj)->di_u.di_dir2sf;
+       return bitize(sf->hdr.i8count ?
+                     (uint)sizeof(xfs_dir2_ino8_t) :
+                     (uint)sizeof(xfs_dir2_ino4_t));
+}
+
+static int
+dir2_sf_entry_name_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_dir2_sf_entry_t     *e;
+
+       ASSERT(bitoffs(startoff) == 0);
+       e = (xfs_dir2_sf_entry_t *)((char *)obj + byteize(startoff));
+       return e->namelen;
+}
+
+/*ARGSUSED*/
+static int
+dir2_sf_entry_inumber_offset(
+       void                    *obj,
+       int                     startoff,
+       int                     idx)
+{
+       xfs_dir2_sf_entry_t     *e;
+
+       ASSERT(bitoffs(startoff) == 0);
+       ASSERT(idx == 0);
+       e = (xfs_dir2_sf_entry_t *)((char *)obj + byteize(startoff));
+       return bitize((int)((char *)XFS_DIR2_SF_INUMBERP(e) - (char *)e));
+}
+
+int
+dir2_sf_entry_size(
+       void                    *obj,
+       int                     startoff,
+       int                     idx)
+{
+       xfs_dir2_sf_entry_t     *e;
+       int                     i;
+       xfs_dir2_sf_t           *sf;
+
+       ASSERT(bitoffs(startoff) == 0);
+       sf = (xfs_dir2_sf_t *)((char *)obj + byteize(startoff));
+       e = XFS_DIR2_SF_FIRSTENTRY(sf);
+       for (i = 0; i < idx; i++)
+               e = XFS_DIR2_SF_NEXTENTRY(sf, e);
+       return bitize((int)XFS_DIR2_SF_ENTSIZE_BYENTRY(sf, e));
+}
+
+/*ARGSUSED*/
+int
+dir2_sf_hdr_size(
+       void            *obj,
+       int             startoff,
+       int             idx)
+{
+       xfs_dir2_sf_t   *sf;
+
+       ASSERT(bitoffs(startoff) == 0);
+       ASSERT(idx == 0);
+       sf = (xfs_dir2_sf_t *)((char *)obj + byteize(startoff));
+       return bitize(XFS_DIR2_SF_HDR_SIZE(sf->hdr.i8count));
+}
+
+static int
+dir2_sf_list_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_dir2_sf_t           *sf;
+
+       ASSERT(bitoffs(startoff) == 0);
+       sf = (xfs_dir2_sf_t *)((char *)obj + byteize(startoff));
+       return sf->hdr.count;
+}
+
+static int
+dir2_sf_list_offset(
+       void                    *obj,
+       int                     startoff,
+       int                     idx)
+{
+       xfs_dir2_sf_entry_t     *e;
+       int                     i;
+       xfs_dir2_sf_t           *sf;
+
+       ASSERT(bitoffs(startoff) == 0);
+       sf = (xfs_dir2_sf_t *)((char *)obj + byteize(startoff));
+       e = XFS_DIR2_SF_FIRSTENTRY(sf);
+       for (i = 0; i < idx; i++)
+               e = XFS_DIR2_SF_NEXTENTRY(sf, e);
+       return bitize((int)((char *)e - (char *)sf));
+}
+
+/*ARGSUSED*/
+int
+dir2sf_size(
+       void                    *obj,
+       int                     startoff,
+       int                     idx)
+{
+       xfs_dir2_sf_entry_t     *e;
+       int                     i;
+       xfs_dir2_sf_t           *sf;
+
+       ASSERT(bitoffs(startoff) == 0);
+       ASSERT(idx == 0);
+       sf = (xfs_dir2_sf_t *)((char *)obj + byteize(startoff));
+       e = XFS_DIR2_SF_FIRSTENTRY(sf);
+       for (i = 0; i < sf->hdr.count; i++)
+               e = XFS_DIR2_SF_NEXTENTRY(sf, e);
+       return bitize((int)((char *)e - (char *)sf));
+}
diff --git a/db/dir2sf.h b/db/dir2sf.h
new file mode 100644 (file)
index 0000000..f720c8b
--- /dev/null
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+extern const field_t   dir2sf_flds[];
+extern const field_t   dir2_inou_flds[];
+extern const field_t   dir2_sf_hdr_flds[];
+extern const field_t   dir2_sf_entry_flds[];
+
+extern int     dir2sf_size(void *obj, int startoff, int idx);
+extern int     dir2_inou_size(void *obj, int startoff, int idx);
+extern int     dir2_sf_entry_size(void *obj, int startoff, int idx);
+extern int     dir2_sf_hdr_size(void *obj, int startoff, int idx);
diff --git a/db/dirshort.c b/db/dirshort.c
new file mode 100644 (file)
index 0000000..4a6f4f4
--- /dev/null
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "type.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "bit.h"
+#include "dirshort.h"
+
+static int     dir_sf_entry_name_count(void *obj, int startoff);
+static int     dir_shortform_list_count(void *obj, int startoff);
+static int     dir_shortform_list_offset(void *obj, int startoff, int idx);
+
+#define        OFF(f)  bitize(offsetof(xfs_dir_shortform_t, f))
+const field_t  dir_shortform_flds[] = {
+       { "hdr", FLDT_DIR_SF_HDR, OI(OFF(hdr)), C1, 0, TYP_NONE },
+       { "list", FLDT_DIR_SF_ENTRY, dir_shortform_list_offset,
+         dir_shortform_list_count, FLD_ARRAY|FLD_COUNT|FLD_OFFSET, TYP_NONE },
+       { NULL }
+};
+
+#define        HOFF(f) bitize(offsetof(xfs_dir_sf_hdr_t, f))
+const field_t  dir_sf_hdr_flds[] = {
+       { "parent", FLDT_DIR_INO, OI(HOFF(parent)), C1, 0, TYP_INODE },
+       { "count", FLDT_UINT8D, OI(HOFF(count)), C1, 0, TYP_NONE },
+       { NULL }
+};
+
+#define        EOFF(f) bitize(offsetof(xfs_dir_sf_entry_t, f))
+const field_t  dir_sf_entry_flds[] = {
+       { "inumber", FLDT_DIR_INO, OI(EOFF(inumber)), C1, 0, TYP_INODE },
+       { "namelen", FLDT_UINT8D, OI(EOFF(namelen)), C1, 0, TYP_NONE },
+       { "name", FLDT_CHARNS, OI(EOFF(name)), dir_sf_entry_name_count,
+         FLD_COUNT, TYP_NONE },
+       { NULL }
+};
+
+static int
+dir_sf_entry_name_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_dir_sf_entry_t      *e;
+       
+       ASSERT(bitoffs(startoff) == 0);
+       e = (xfs_dir_sf_entry_t *)((char *)obj + byteize(startoff));
+       return e->namelen;
+}
+
+int
+dir_sf_entry_size(
+       void                    *obj,
+       int                     startoff,
+       int                     idx)
+{
+       xfs_dir_sf_entry_t      *e;
+       int                     i;
+       xfs_dir_shortform_t     *sf;
+
+       ASSERT(bitoffs(startoff) == 0);
+       sf = (xfs_dir_shortform_t *)((char *)obj + byteize(startoff));
+       e = &sf->list[0];
+       for (i = 0; i < idx; i++)
+               e = XFS_DIR_SF_NEXTENTRY(e);
+       return bitize((int)XFS_DIR_SF_ENTSIZE_BYENTRY(e));
+}
+
+static int
+dir_shortform_list_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_dir_shortform_t     *sf;
+
+       ASSERT(bitoffs(startoff) == 0);
+       sf = (xfs_dir_shortform_t *)((char *)obj + byteize(startoff));
+       return sf->hdr.count;
+}
+
+static int
+dir_shortform_list_offset(
+       void                    *obj,
+       int                     startoff,
+       int                     idx)
+{
+       xfs_dir_sf_entry_t      *e;
+       int                     i;
+       xfs_dir_shortform_t     *sf;
+
+       ASSERT(bitoffs(startoff) == 0);
+       sf = (xfs_dir_shortform_t *)((char *)obj + byteize(startoff));
+       e = &sf->list[0];
+       for (i = 0; i < idx; i++)
+               e = XFS_DIR_SF_NEXTENTRY(e);
+       return bitize((int)((char *)e - (char *)sf));
+}
+
+int
+dirshort_size(
+       void                    *obj,
+       int                     startoff,
+       int                     idx)
+{
+       xfs_dir_sf_entry_t      *e;
+       int                     i;
+       xfs_dir_shortform_t     *sf;
+
+       ASSERT(bitoffs(startoff) == 0);
+       ASSERT(idx == 0);
+       sf = (xfs_dir_shortform_t *)((char *)obj + byteize(startoff));
+       e = &sf->list[0];
+       for (i = 0; i < sf->hdr.count; i++)
+               e = XFS_DIR_SF_NEXTENTRY(e);
+       return bitize((int)((char *)e - (char *)sf));
+}
diff --git a/db/dirshort.h b/db/dirshort.h
new file mode 100644 (file)
index 0000000..2d50efb
--- /dev/null
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+extern const field_t   dir_sf_entry_flds[];
+extern const field_t   dir_sf_hdr_flds[];
+extern const field_t   dir_shortform_flds[];
+extern const field_t   dirshort_hfld[];
+
+extern int     dir_sf_entry_size(void *obj, int startoff, int idx);
+extern int     dirshort_size(void *obj, int startoff, int idx);
diff --git a/db/dquot.c b/db/dquot.c
new file mode 100644 (file)
index 0000000..be22d81
--- /dev/null
@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include <getopt.h>
+#include "bit.h"
+#include "bmap.h"
+#include "command.h"
+#include "data.h"
+#include "dquot.h"
+#include "type.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "inode.h"
+#include "io.h"
+#include "mount.h"
+#include "output.h"
+
+static int     dquot_f(int argc, char **argv);
+static void    dquot_help(void);
+
+static const cmdinfo_t dquot_cmd =
+       { "dquot", NULL, dquot_f, 1, 2, 1, "[project|user id]",
+         "set current address to project or user quota block", dquot_help };
+
+const field_t  dqblk_hfld[] = {
+       { "", FLDT_DQBLK, OI(0), C1, 0, TYP_NONE },
+       { NULL }
+};
+
+#define        DDOFF(f)        bitize(offsetof(xfs_dqblk_t, dd_ ## f))
+#define        DDSZC(f)        szcount(xfs_dqblk_t, dd_ ## f)
+const field_t  dqblk_flds[] = {
+       { "diskdq", FLDT_DISK_DQUOT, OI(DDOFF(diskdq)), C1, 0, TYP_NONE },
+       { "fill", FLDT_CHARS, OI(DDOFF(fill)), CI(DDSZC(fill)), FLD_SKIPALL,
+         TYP_NONE },
+       { NULL }
+};
+
+#define        DOFF(f)         bitize(offsetof(xfs_disk_dquot_t, d_ ## f))
+const field_t  disk_dquot_flds[] = {
+       { "magic", FLDT_UINT16X, OI(DOFF(magic)), C1, 0, TYP_NONE },
+       { "version", FLDT_UINT8X, OI(DOFF(version)), C1, 0, TYP_NONE },
+       { "flags", FLDT_UINT8X, OI(DOFF(flags)), C1, 0, TYP_NONE },
+       { "id", FLDT_DQID, OI(DOFF(id)), C1, 0, TYP_NONE },
+       { "blk_hardlimit", FLDT_QCNT, OI(DOFF(blk_hardlimit)), C1, 0,
+         TYP_NONE },
+       { "blk_softlimit", FLDT_QCNT, OI(DOFF(blk_softlimit)), C1, 0,
+         TYP_NONE },
+       { "ino_hardlimit", FLDT_QCNT, OI(DOFF(ino_hardlimit)), C1, 0,
+         TYP_NONE },
+       { "ino_softlimit", FLDT_QCNT, OI(DOFF(ino_softlimit)), C1, 0,
+         TYP_NONE },
+       { "bcount", FLDT_QCNT, OI(DOFF(bcount)), C1, 0, TYP_NONE },
+       { "icount", FLDT_QCNT, OI(DOFF(icount)), C1, 0, TYP_NONE },
+       { "itimer", FLDT_INT32D, OI(DOFF(itimer)), C1, 0, TYP_NONE },
+       { "btimer", FLDT_INT32D, OI(DOFF(btimer)), C1, 0, TYP_NONE },
+       { "iwarns", FLDT_QWARNCNT, OI(DOFF(iwarns)), C1, 0, TYP_NONE },
+       { "bwarns", FLDT_QWARNCNT, OI(DOFF(bwarns)), C1, 0, TYP_NONE },
+       { "pad0", FLDT_INT32D, OI(DOFF(pad0)), C1, FLD_SKIPALL, TYP_NONE },
+       { "rtb_hardlimit", FLDT_QCNT, OI(DOFF(rtb_hardlimit)), C1, 0,
+         TYP_NONE },
+       { "rtb_softlimit", FLDT_QCNT, OI(DOFF(rtb_softlimit)), C1, 0,
+         TYP_NONE },
+       { "rtbcount", FLDT_QCNT, OI(DOFF(rtbcount)), C1, 0, TYP_NONE },
+       { "rtbtimer", FLDT_INT32D, OI(DOFF(rtbtimer)), C1, 0, TYP_NONE },
+       { "rtbwarns", FLDT_QWARNCNT, OI(DOFF(rtbwarns)), C1, 0, TYP_NONE },
+       { "pad", FLDT_UINT16X, OI(DOFF(pad)), C1, FLD_SKIPALL, TYP_NONE },
+       { NULL }
+};
+
+static void
+dquot_help(void)
+{
+}
+
+static int
+dquot_f(
+       int             argc,
+       char            **argv)
+{
+       bmap_ext_t      bm;
+       int             c;
+       int             doproj;
+       xfs_dqid_t      id;
+       xfs_ino_t       ino;
+       int             nex;
+       char            *p;
+       int             perblock;
+       xfs_fileoff_t   qbno;
+       int             qoff;
+       char            *s;
+
+       doproj = optind = 0;
+       while ((c = getopt(argc, argv, "pu")) != EOF) {
+               switch (c) {
+               case 'p':
+                       doproj = 1;
+                       break;
+               case 'u':
+                       doproj = 0;
+                       break;
+               default:
+                       dbprintf("bad option for dquot command\n");
+                       return 0;
+               }
+       }
+       s = doproj ? "project" : "user";
+       if (optind != argc - 1) {
+               dbprintf("dquot command requires one %s id argument\n", s);
+               return 0;
+       }
+       ino = doproj ? mp->m_sb.sb_pquotino : mp->m_sb.sb_uquotino;
+       if (ino == 0 || ino == NULLFSINO) {
+               dbprintf("no %s quota inode present\n", s);
+               return 0;
+       }
+       id = (xfs_dqid_t)strtol(argv[optind], &p, 0);
+       if (*p != '\0') {
+               dbprintf("bad %s id for dquot %s\n", s, argv[optind]);
+               return 0;
+       }
+       perblock = (int)(mp->m_sb.sb_blocksize / sizeof(xfs_dqblk_t));
+       qbno = (xfs_fileoff_t)(id / perblock);
+       qoff = (int)(id % perblock);
+       push_cur();
+       set_cur_inode(ino);
+       nex = 1;
+       bmap(qbno, 1, XFS_DATA_FORK, &nex, &bm);
+       pop_cur();
+       if (nex == 0) {
+               dbprintf("no %s quota data for id %d\n", s, id);
+               return 0;
+       }
+       set_cur(&typtab[TYP_DQBLK], XFS_FSB_TO_DADDR(mp, bm.startblock), blkbb,
+               DB_RING_IGN, NULL);
+       off_cur(qoff * (int)sizeof(xfs_dqblk_t), sizeof(xfs_dqblk_t));
+       ring_add();
+       return 0;
+}
+
+void
+dquot_init(void)
+{
+       add_command(&dquot_cmd);
+}
diff --git a/db/dquot.h b/db/dquot.h
new file mode 100644 (file)
index 0000000..ce23197
--- /dev/null
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+struct field;
+
+extern const struct field      disk_dquot_flds[];
+extern const struct field      dqblk_flds[];
+extern const struct field      dqblk_hfld[];
+
+extern void    dquot_init(void);
diff --git a/db/echo.c b/db/echo.c
new file mode 100644 (file)
index 0000000..7027870
--- /dev/null
+++ b/db/echo.c
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "command.h"
+#include "echo.h"
+#include "output.h"
+
+static int     echo_f(int argc, char **argv);
+
+static const cmdinfo_t echo_cmd =
+       { "echo", NULL, echo_f, 0, -1, 0, "[args]...",
+         "echo arguments", NULL };
+
+/*ARGSUSED*/
+static int
+echo_f(
+       int     argc,
+       char    **argv)
+{
+       char    *c;
+
+       for (c = *(++argv); c; c = *(++argv))
+               dbprintf("%s ", c);
+       dbprintf("\n");
+       return 0;
+}
+
+void
+echo_init(void)
+{
+       add_command(&echo_cmd);
+}
diff --git a/db/echo.h b/db/echo.h
new file mode 100644 (file)
index 0000000..a2ddeb6
--- /dev/null
+++ b/db/echo.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+extern void    echo_init(void);
diff --git a/db/faddr.c b/db/faddr.c
new file mode 100644 (file)
index 0000000..ee58936
--- /dev/null
@@ -0,0 +1,404 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "data.h"
+#include "type.h"
+#include "faddr.h"
+#include "inode.h"
+#include "io.h"
+#include "bit.h"
+#include "bmap.h"
+#include "output.h"
+#include "mount.h"
+
+void
+fa_agblock(
+       void            *obj,
+       int             bit,
+       typnm_t         next)
+{
+       xfs_agblock_t   bno;
+
+       if (cur_agno == NULLAGNUMBER) {
+               dbprintf("no current allocation group, cannot set new addr\n");
+               return;
+       }
+       bno = (xfs_agblock_t)getbitval(obj, bit, bitsz(bno), BVUNSIGNED);
+       if (bno == NULLAGBLOCK) {
+               dbprintf("null block number, cannot set new addr\n");
+               return;
+       }
+       ASSERT(typtab[next].typnm == next);
+       set_cur(&typtab[next], XFS_AGB_TO_DADDR(mp, cur_agno, bno), blkbb,
+               DB_RING_ADD, NULL);
+}
+
+/*ARGSUSED*/
+void
+fa_agino(
+       void            *obj,
+       int             bit,
+       typnm_t         next)
+{
+       xfs_agino_t     agino;
+
+       if (cur_agno == NULLAGNUMBER) {
+               dbprintf("no current allocation group, cannot set new addr\n");
+               return;
+       }
+       agino = (xfs_agino_t)getbitval(obj, bit, bitsz(agino), BVUNSIGNED);
+       if (agino == NULLAGINO) {
+               dbprintf("null inode number, cannot set new addr\n");
+               return;
+       }
+       set_cur_inode(XFS_AGINO_TO_INO(mp, cur_agno, agino));
+}
+
+/*ARGSUSED*/
+void
+fa_attrblock(
+       void            *obj,
+       int             bit,
+       typnm_t         next)
+{
+       bmap_ext_t      bm;
+       __uint32_t      bno;
+       xfs_dfsbno_t    dfsbno;
+       int             nex;
+
+       bno = (__uint32_t)getbitval(obj, bit, bitsz(bno), BVUNSIGNED);
+       if (bno == 0) {
+               dbprintf("null attribute block number, cannot set new addr\n");
+               return;
+       }
+       nex = 1;
+       bmap(bno, 1, XFS_ATTR_FORK, &nex, &bm);
+       if (nex == 0) {
+               dbprintf("attribute block is unmapped\n");
+               return;
+       }
+       dfsbno = bm.startblock + (bno - bm.startoff);
+       ASSERT(typtab[next].typnm == next);
+       set_cur(&typtab[next], (__int64_t)XFS_FSB_TO_DADDR(mp, dfsbno), blkbb,
+               DB_RING_ADD, NULL);
+}
+
+void
+fa_cfileoffa(
+       void            *obj,
+       int             bit,
+       typnm_t         next)
+{
+       bmap_ext_t      bm;
+       xfs_dfiloff_t   bno;
+       xfs_dfsbno_t    dfsbno;
+       int             nex;
+
+       bno = (xfs_dfiloff_t)getbitval(obj, bit, BMBT_STARTOFF_BITLEN,
+               BVUNSIGNED);
+       if (bno == NULLDFILOFF) {
+               dbprintf("null block number, cannot set new addr\n");
+               return;
+       }
+       nex = 1;
+       bmap(bno, 1, XFS_ATTR_FORK, &nex, &bm);
+       if (nex == 0) {
+               dbprintf("file block is unmapped\n");
+               return;
+       }
+       dfsbno = bm.startblock + (bno - bm.startoff);
+       ASSERT(typtab[next].typnm == next);
+       set_cur(&typtab[next], XFS_FSB_TO_DADDR(mp, dfsbno), blkbb, DB_RING_ADD,
+               NULL);
+}
+
+void
+fa_cfileoffd(
+       void            *obj,
+       int             bit,
+       typnm_t         next)
+{
+       bbmap_t         bbmap;
+       bmap_ext_t      *bmp;
+       xfs_dfiloff_t   bno;
+       xfs_dfsbno_t    dfsbno;
+       int             nb;
+       int             nex;
+
+       bno = (xfs_dfiloff_t)getbitval(obj, bit, BMBT_STARTOFF_BITLEN,
+               BVUNSIGNED);
+       if (bno == NULLDFILOFF) {
+               dbprintf("null block number, cannot set new addr\n");
+               return;
+       }
+       nex = nb = next == TYP_DIR2 ? mp->m_dirblkfsbs : 1;
+       bmp = malloc(nb * sizeof(*bmp));
+       bmap(bno, nb, XFS_DATA_FORK, &nex, bmp);
+       if (nex == 0) {
+               dbprintf("file block is unmapped\n");
+               free(bmp);
+               return;
+       }
+       dfsbno = bmp->startblock + (bno - bmp->startoff);
+       ASSERT(typtab[next].typnm == next);
+       if (nex > 1)
+               make_bbmap(&bbmap, nex, bmp);
+       set_cur(&typtab[next], XFS_FSB_TO_DADDR(mp, dfsbno), nb * blkbb,
+               DB_RING_ADD, nex > 1 ? &bbmap: NULL);
+       free(bmp);
+}
+
+void
+fa_cfsblock(
+       void            *obj,
+       int             bit,
+       typnm_t         next)
+{
+       xfs_dfsbno_t    bno;
+
+       bno = (xfs_dfsbno_t)getbitval(obj, bit, BMBT_STARTBLOCK_BITLEN,
+               BVUNSIGNED);
+       if (bno == NULLDFSBNO) {
+               dbprintf("null block number, cannot set new addr\n");
+               return;
+       }
+       ASSERT(typtab[next].typnm == next);
+       set_cur(&typtab[next], XFS_FSB_TO_DADDR(mp, bno), blkbb, DB_RING_ADD,
+               NULL);
+}
+
+void
+fa_dfiloffa(
+       void            *obj,
+       int             bit,
+       typnm_t         next)
+{
+       bmap_ext_t      bm;
+       xfs_dfiloff_t   bno;
+       xfs_dfsbno_t    dfsbno;
+       int             nex;
+
+       bno = (xfs_dfiloff_t)getbitval(obj, bit, bitsz(bno), BVUNSIGNED);
+       if (bno == NULLDFILOFF) {
+               dbprintf("null block number, cannot set new addr\n");
+               return;
+       }
+       nex = 1;
+       bmap(bno, 1, XFS_ATTR_FORK, &nex, &bm);
+       if (nex == 0) {
+               dbprintf("file block is unmapped\n");
+               return;
+       }
+       dfsbno = bm.startblock + (bno - bm.startoff);
+       ASSERT(typtab[next].typnm == next);
+       set_cur(&typtab[next], XFS_FSB_TO_DADDR(mp, dfsbno), blkbb, DB_RING_ADD,
+               NULL);
+}
+
+void
+fa_dfiloffd(
+       void            *obj,
+       int             bit,
+       typnm_t         next)
+{
+       bbmap_t         bbmap;
+       bmap_ext_t      *bmp;
+       xfs_dfiloff_t   bno;
+       xfs_dfsbno_t    dfsbno;
+       int             nb;
+       int             nex;
+
+       bno = (xfs_dfiloff_t)getbitval(obj, bit, bitsz(bno), BVUNSIGNED);
+       if (bno == NULLDFILOFF) {
+               dbprintf("null block number, cannot set new addr\n");
+               return;
+       }
+       nex = nb = next == TYP_DIR2 ? mp->m_dirblkfsbs : 1;
+       bmp = malloc(nb * sizeof(*bmp));
+       bmap(bno, nb, XFS_DATA_FORK, &nex, bmp);
+       if (nex == 0) {
+               dbprintf("file block is unmapped\n");
+               free(bmp);
+               return;
+       }
+       dfsbno = bmp->startblock + (bno - bmp->startoff);
+       ASSERT(typtab[next].typnm == next);
+       if (nex > 1)
+               make_bbmap(&bbmap, nex, bmp);
+       set_cur(&typtab[next], XFS_FSB_TO_DADDR(mp, dfsbno), nb * blkbb,
+               DB_RING_ADD, nex > 1 ? &bbmap : NULL);
+       free(bmp);
+}
+
+void
+fa_dfsbno(
+       void            *obj,
+       int             bit,
+       typnm_t         next)
+{
+       xfs_dfsbno_t    bno;
+
+       bno = (xfs_dfsbno_t)getbitval(obj, bit, bitsz(bno), BVUNSIGNED);
+       if (bno == NULLDFSBNO) {
+               dbprintf("null block number, cannot set new addr\n");
+               return;
+       }
+       ASSERT(typtab[next].typnm == next);
+       set_cur(&typtab[next], XFS_FSB_TO_DADDR(mp, bno), blkbb, DB_RING_ADD,
+               NULL);
+}
+
+/*ARGSUSED*/
+void
+fa_dirblock(
+       void            *obj,
+       int             bit,
+       typnm_t         next)
+{
+       bbmap_t         bbmap;
+       bmap_ext_t      *bmp;
+       __uint32_t      bno;
+       xfs_dfsbno_t    dfsbno;
+       int             nex;
+
+       bno = (__uint32_t)getbitval(obj, bit, bitsz(bno), BVUNSIGNED);
+       if (bno == 0) {
+               dbprintf("null directory block number, cannot set new addr\n");
+               return;
+       }
+       nex = mp->m_dirblkfsbs;
+       bmp = malloc(nex * sizeof(*bmp));
+       bmap(bno, mp->m_dirblkfsbs, XFS_DATA_FORK, &nex, bmp);
+       if (nex == 0) {
+               dbprintf("directory block is unmapped\n");
+               free(bmp);
+               return;
+       }
+       dfsbno = bmp->startblock + (bno - bmp->startoff);
+       ASSERT(typtab[next].typnm == next);
+       if (nex > 1)
+               make_bbmap(&bbmap, nex, bmp);
+       set_cur(&typtab[next], (__int64_t)XFS_FSB_TO_DADDR(mp, dfsbno),
+               (int)XFS_FSB_TO_DADDR(mp, mp->m_dirblkfsbs), DB_RING_ADD, 
+               nex > 1 ? &bbmap : NULL);
+       free(bmp);
+}
+
+void
+fa_drfsbno(
+       void            *obj,
+       int             bit,
+       typnm_t         next)
+{
+       xfs_drfsbno_t   bno;
+
+       bno = (xfs_drfsbno_t)getbitval(obj, bit, bitsz(bno), BVUNSIGNED);
+       if (bno == NULLDRFSBNO) {
+               dbprintf("null block number, cannot set new addr\n");
+               return;
+       }
+       ASSERT(typtab[next].typnm == next);
+       set_cur(&typtab[next], (__int64_t)XFS_FSB_TO_BB(mp, bno), blkbb,
+               DB_RING_ADD, NULL);
+}
+
+/*ARGSUSED*/
+void
+fa_drtbno(
+       void    *obj,
+       int     bit,
+       typnm_t next)
+{
+       xfs_drtbno_t    bno;
+
+       bno = (xfs_drtbno_t)getbitval(obj, bit, bitsz(bno), BVUNSIGNED);
+       if (bno == NULLDRTBNO) {
+               dbprintf("null block number, cannot set new addr\n");
+               return;
+       }
+       /* need set_cur to understand rt subvolume */
+}
+
+/*ARGSUSED*/
+void
+fa_ino(
+       void            *obj,
+       int             bit,
+       typnm_t         next)
+{
+       xfs_ino_t       ino;
+
+       ASSERT(next == TYP_INODE);
+       ino = (xfs_ino_t)getbitval(obj, bit, bitsz(ino), BVUNSIGNED);
+       if (ino == NULLFSINO) {
+               dbprintf("null inode number, cannot set new addr\n");
+               return;
+       }
+       set_cur_inode(ino);
+}
+
+void
+fa_ino4(
+       void            *obj,
+       int             bit,
+       typnm_t         next)
+{
+       xfs_ino_t       ino;
+       xfs_dir2_ino4_t ino4;
+
+       ASSERT(next == TYP_INODE);
+       ino = (xfs_ino_t)getbitval(obj, bit, bitsz(ino4), BVUNSIGNED);
+       if (ino == NULLFSINO) {
+               dbprintf("null inode number, cannot set new addr\n");
+               return;
+       }
+       set_cur_inode(ino);
+}
+
+void
+fa_ino8(
+       void            *obj,
+       int             bit,
+       typnm_t         next)
+{
+       xfs_ino_t       ino;
+       xfs_dir2_ino8_t ino8;
+
+       ASSERT(next == TYP_INODE);
+       ino = (xfs_ino_t)getbitval(obj, bit, bitsz(ino8), BVUNSIGNED);
+       if (ino == NULLFSINO) {
+               dbprintf("null inode number, cannot set new addr\n");
+               return;
+       }
+       set_cur_inode(ino);
+}
diff --git a/db/faddr.h b/db/faddr.h
new file mode 100644 (file)
index 0000000..25c471e
--- /dev/null
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+typedef void (*adfnc_t)(void *obj, int bit, typnm_t next);
+
+extern void    fa_agblock(void *obj, int bit, typnm_t next);
+extern void    fa_agino(void *obj, int bit, typnm_t next);
+extern void    fa_attrblock(void *obj, int bit, typnm_t next);
+extern void    fa_cfileoffd(void *obj, int bit, typnm_t next);
+extern void    fa_cfsblock(void *obj, int bit, typnm_t next);
+extern void    fa_dfiloffd(void *obj, int bit, typnm_t next);
+extern void    fa_dfsbno(void *obj, int bit, typnm_t next);
+extern void    fa_dinode_union(void *obj, int bit, typnm_t next);
+extern void    fa_dirblock(void *obj, int bit, typnm_t next);
+extern void    fa_drfsbno(void *obj, int bit, typnm_t next);
+extern void    fa_drtbno(void *obj, int bit, typnm_t next);
+extern void    fa_ino(void *obj, int bit, typnm_t next);
+extern void    fa_cfileoffa(void *obj, int bit, typnm_t next);
+extern void    fa_dfiloffa(void *obj, int bit, typnm_t next);
+extern void    fa_ino4(void *obj, int bit, typnm_t next);
+extern void    fa_ino8(void *obj, int bit, typnm_t next);
diff --git a/db/field.c b/db/field.c
new file mode 100644 (file)
index 0000000..399c472
--- /dev/null
@@ -0,0 +1,394 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "type.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "inode.h"
+#include "bnobt.h"
+#include "cntbt.h"
+#include "inobt.h"
+#include "bmapbt.h"
+#include "bmroot.h"
+#include "bit.h"
+#include "agf.h"
+#include "agfl.h"
+#include "agi.h"
+#include "sb.h"
+#include "dir.h"
+#include "dirshort.h"
+#include "attr.h"
+#include "attrshort.h"
+#include "dquot.h"
+#include "dir2.h"
+#include "dir2sf.h"
+
+const ftattr_t ftattrtab[] = {
+       { FLDT_AEXTNUM, "aextnum", fp_num, "%d", SI(bitsz(xfs_aextnum_t)),
+         FTARG_SIGNED, NULL, NULL },
+       { FLDT_AGBLOCK, "agblock", fp_num, "%u", SI(bitsz(xfs_agblock_t)),
+         FTARG_DONULL, fa_agblock, NULL },
+       { FLDT_AGBLOCKNZ, "agblocknz", fp_num, "%u", SI(bitsz(xfs_agblock_t)),
+         FTARG_SKIPZERO|FTARG_DONULL, fa_agblock, NULL },
+       { FLDT_AGF, "agf", NULL, (char *)agf_flds, agf_size, FTARG_SIZE, NULL,
+         agf_flds },
+       { FLDT_AGFL, "agfl", NULL, (char *)agfl_flds, agfl_size, FTARG_SIZE,
+         NULL, agfl_flds },
+       { FLDT_AGI, "agi", NULL, (char *)agi_flds, agi_size, FTARG_SIZE, NULL,
+         agi_flds },
+       { FLDT_AGINO, "agino", fp_num, "%u", SI(bitsz(xfs_agino_t)),
+         FTARG_DONULL, fa_agino, NULL },
+       { FLDT_AGINONN, "aginonn", fp_num, "%u", SI(bitsz(xfs_agino_t)),
+         FTARG_SKIPNULL, fa_agino, NULL },
+       { FLDT_AGNUMBER, "agnumber", fp_num, "%u", SI(bitsz(xfs_agnumber_t)),
+         FTARG_DONULL, NULL, NULL },
+       { FLDT_ATTR, "attr", NULL, (char *)attr_flds, attr_size, FTARG_SIZE,
+         NULL, attr_flds },
+       { FLDT_ATTR_BLKINFO, "attr_blkinfo", NULL, (char *)attr_blkinfo_flds,
+         SI(bitsz(struct xfs_da_blkinfo)), 0, NULL, attr_blkinfo_flds },
+       { FLDT_ATTR_LEAF_ENTRY, "attr_leaf_entry", fp_sarray,
+         (char *)attr_leaf_entry_flds, SI(bitsz(struct xfs_attr_leaf_entry)),
+         0, NULL, attr_leaf_entry_flds },
+       { FLDT_ATTR_LEAF_HDR, "attr_leaf_hdr", NULL, (char *)attr_leaf_hdr_flds,
+         SI(bitsz(struct xfs_attr_leaf_hdr)), 0, NULL, attr_leaf_hdr_flds },
+       { FLDT_ATTR_LEAF_MAP, "attr_leaf_map", fp_sarray,
+         (char *)attr_leaf_map_flds, SI(bitsz(struct xfs_attr_leaf_map)), 0,
+         NULL, attr_leaf_map_flds },
+       { FLDT_ATTR_LEAF_NAME, "attr_leaf_name", NULL,
+         (char *)attr_leaf_name_flds, attr_leaf_name_size, FTARG_SIZE, NULL,
+         attr_leaf_name_flds },
+       { FLDT_ATTR_NODE_ENTRY, "attr_node_entry", fp_sarray,
+         (char *)attr_node_entry_flds, SI(bitsz(struct xfs_da_node_entry)), 0,
+         NULL, attr_node_entry_flds },
+       { FLDT_ATTR_NODE_HDR, "attr_node_hdr", NULL, (char *)attr_node_hdr_flds,
+         SI(bitsz(struct xfs_da_node_hdr)), 0, NULL, attr_node_hdr_flds },
+       { FLDT_ATTR_SF_ENTRY, "attr_sf_entry", NULL, (char *)attr_sf_entry_flds,
+         attr_sf_entry_size, FTARG_SIZE, NULL, attr_sf_entry_flds },
+       { FLDT_ATTR_SF_HDR, "attr_sf_hdr", NULL, (char *)attr_sf_hdr_flds,
+         SI(bitsz(struct xfs_attr_sf_hdr)), 0, NULL, attr_sf_hdr_flds },
+       { FLDT_ATTRBLOCK, "attrblock", fp_num, "%u", SI(bitsz(__uint32_t)), 0,
+         fa_attrblock, NULL },
+       { FLDT_ATTRSHORT, "attrshort", NULL, (char *)attr_shortform_flds,
+         attrshort_size, FTARG_SIZE, NULL, attr_shortform_flds },
+       { FLDT_BMAPBTA, "bmapbta", NULL, (char *)bmapbta_flds, bmapbta_size,
+         FTARG_SIZE, NULL, bmapbta_flds },
+       { FLDT_BMAPBTAKEY, "bmapbtakey", fp_sarray, (char *)bmapbta_key_flds,
+         SI(bitsz(xfs_bmbt_key_t)), 0, NULL, bmapbta_key_flds },
+       { FLDT_BMAPBTAPTR, "bmapbtaptr", fp_num, "%llu",
+         SI(bitsz(xfs_bmbt_ptr_t)), 0, fa_dfsbno, NULL },
+       { FLDT_BMAPBTAREC, "bmapbtarec", fp_sarray, (char *)bmapbta_rec_flds,
+         SI(bitsz(xfs_bmbt_rec_t)), 0, NULL, bmapbta_rec_flds },
+       { FLDT_BMAPBTD, "bmapbtd", NULL, (char *)bmapbtd_flds, bmapbtd_size,
+         FTARG_SIZE, NULL, bmapbtd_flds },
+       { FLDT_BMAPBTDKEY, "bmapbtdkey", fp_sarray, (char *)bmapbtd_key_flds,
+         SI(bitsz(xfs_bmbt_key_t)), 0, NULL, bmapbtd_key_flds },
+       { FLDT_BMAPBTDPTR, "bmapbtdptr", fp_num, "%llu",
+         SI(bitsz(xfs_bmbt_ptr_t)), 0, fa_dfsbno, NULL },
+       { FLDT_BMAPBTDREC, "bmapbtdrec", fp_sarray, (char *)bmapbtd_rec_flds,
+         SI(bitsz(xfs_bmbt_rec_t)), 0, NULL, bmapbtd_rec_flds },
+       { FLDT_BMROOTA, "bmroota", NULL, (char *)bmroota_flds, bmroota_size,
+         FTARG_SIZE, NULL, bmroota_flds },
+       { FLDT_BMROOTAKEY, "bmrootakey", fp_sarray, (char *)bmroota_key_flds,
+         SI(bitsz(xfs_bmdr_key_t)), 0, NULL, bmroota_key_flds },
+       { FLDT_BMROOTAPTR, "bmrootaptr", fp_num, "%llu",
+         SI(bitsz(xfs_bmdr_ptr_t)), 0, fa_dfsbno, NULL },
+       { FLDT_BMROOTD, "bmrootd", NULL, (char *)bmrootd_flds, bmrootd_size,
+         FTARG_SIZE, NULL, bmrootd_flds },
+       { FLDT_BMROOTDKEY, "bmrootdkey", fp_sarray, (char *)bmrootd_key_flds,
+         SI(bitsz(xfs_bmdr_key_t)), 0, NULL, bmrootd_key_flds },
+       { FLDT_BMROOTDPTR, "bmrootdptr", fp_num, "%llu",
+         SI(bitsz(xfs_bmdr_ptr_t)), 0, fa_dfsbno, NULL },
+       { FLDT_BNOBT, "bnobt", NULL, (char *)bnobt_flds, bnobt_size, FTARG_SIZE,
+         NULL, bnobt_flds },
+       { FLDT_BNOBTKEY, "bnobtkey", fp_sarray, (char *)bnobt_key_flds,
+         SI(bitsz(xfs_alloc_key_t)), 0, NULL, bnobt_key_flds },
+       { FLDT_BNOBTPTR, "bnobtptr", fp_num, "%u", SI(bitsz(xfs_alloc_ptr_t)),
+         0, fa_agblock, NULL },
+       { FLDT_BNOBTREC, "bnobtrec", fp_sarray, (char *)bnobt_rec_flds,
+         SI(bitsz(xfs_alloc_rec_t)), 0, NULL, bnobt_rec_flds },
+       { FLDT_CEXTFLG, "cextflag", fp_num, "%u", SI(BMBT_EXNTFLAG_BITLEN), 0,
+         NULL, NULL },
+       { FLDT_CEXTLEN, "cextlen", fp_num, "%u", SI(BMBT_BLOCKCOUNT_BITLEN), 0,
+         NULL, NULL },
+       { FLDT_CFILEOFFA, "cfileoffa", fp_num, "%llu", SI(BMBT_STARTOFF_BITLEN),
+         0, fa_cfileoffa, NULL },
+       { FLDT_CFILEOFFD, "cfileoffd", fp_num, "%llu", SI(BMBT_STARTOFF_BITLEN),
+         0, fa_cfileoffd, NULL },
+       { FLDT_CFSBLOCK, "cfsblock", fp_num, "%llu", SI(BMBT_STARTBLOCK_BITLEN),
+         0, fa_cfsblock, NULL },
+       { FLDT_CHARNS, "charns", fp_charns, NULL, SI(bitsz(char)), 0, NULL,
+         NULL },
+       { FLDT_CHARS, "chars", fp_num, "%c", SI(bitsz(char)), 0, NULL, NULL },
+       { FLDT_CNTBT, "cntbt", NULL, (char *)cntbt_flds, cntbt_size, FTARG_SIZE,
+         NULL, cntbt_flds },
+       { FLDT_CNTBTKEY, "cntbtkey", fp_sarray, (char *)cntbt_key_flds,
+         SI(bitsz(xfs_alloc_key_t)), 0, NULL, cntbt_key_flds },
+       { FLDT_CNTBTPTR, "cntbtptr", fp_num, "%u", SI(bitsz(xfs_alloc_ptr_t)),
+         0, fa_agblock, NULL },
+       { FLDT_CNTBTREC, "cntbtrec", fp_sarray, (char *)cntbt_rec_flds,
+         SI(bitsz(xfs_alloc_rec_t)), 0, NULL, cntbt_rec_flds },
+       { FLDT_DEV, "dev", fp_num, "%#x", SI(bitsz(xfs_dev_t)), 0, NULL, NULL },
+       { FLDT_DFILOFFA, "dfiloffa", fp_num, "%llu", SI(bitsz(xfs_dfiloff_t)),
+         0, fa_dfiloffa, NULL },
+       { FLDT_DFILOFFD, "dfiloffd", fp_num, "%llu", SI(bitsz(xfs_dfiloff_t)),
+         0, fa_dfiloffd, NULL },
+       { FLDT_DFSBNO, "dfsbno", fp_num, "%llu", SI(bitsz(xfs_dfsbno_t)),
+         FTARG_DONULL, fa_dfsbno, NULL },
+       { FLDT_DINODE_A, "dinode_a", NULL, (char *)inode_a_flds, inode_a_size,
+         FTARG_SIZE|FTARG_OKEMPTY, NULL, inode_a_flds },
+       { FLDT_DINODE_CORE, "dinode_core", NULL, (char *)inode_core_flds,
+         SI(bitsz(xfs_dinode_core_t)), 0, NULL, inode_core_flds },
+       { FLDT_DINODE_FMT, "dinode_fmt", fp_dinode_fmt, NULL,
+         SI(bitsz(__int8_t)), 0, NULL, NULL },
+       { FLDT_DINODE_U, "dinode_u", NULL, (char *)inode_u_flds, inode_u_size,
+         FTARG_SIZE|FTARG_OKEMPTY, NULL, inode_u_flds },
+       { FLDT_DIR, "dir", NULL, (char *)dir_flds, dir_size, FTARG_SIZE, NULL,
+         dir_flds },
+       { FLDT_DIR2, "dir2", NULL, (char *)dir2_flds, dir2_size, FTARG_SIZE,
+         NULL, dir2_flds },
+       { FLDT_DIR2_BLOCK_TAIL, "dir2_block_tail", NULL,
+         (char *)dir2_block_tail_flds, SI(bitsz(xfs_dir2_block_tail_t)), 0,
+         NULL, dir2_block_tail_flds },
+       { FLDT_DIR2_DATA_FREE, "dir2_data_free", NULL,
+         (char *)dir2_data_free_flds, SI(bitsz(xfs_dir2_data_free_t)), 0, NULL,
+         dir2_data_free_flds },
+       { FLDT_DIR2_DATA_HDR, "dir2_data_hdr", NULL, (char *)dir2_data_hdr_flds,
+         SI(bitsz(xfs_dir2_data_hdr_t)), 0, NULL, dir2_data_hdr_flds },
+       { FLDT_DIR2_DATA_OFF, "dir2_data_off", fp_num, "%#x",
+         SI(bitsz(xfs_dir2_data_off_t)), 0, NULL, NULL },
+       { FLDT_DIR2_DATA_OFFNZ, "dir2_data_offnz", fp_num, "%#x",
+         SI(bitsz(xfs_dir2_data_off_t)), FTARG_SKIPZERO, NULL, NULL },
+       { FLDT_DIR2_DATA_UNION, "dir2_data_union", NULL,
+         (char *)dir2_data_union_flds, dir2_data_union_size, FTARG_SIZE, NULL,
+         dir2_data_union_flds },
+       { FLDT_DIR2_FREE_HDR, "dir2_free_hdr", NULL, (char *)dir2_free_hdr_flds,
+         SI(bitsz(xfs_dir2_free_hdr_t)), 0, NULL, dir2_free_hdr_flds },
+       { FLDT_DIR2_INO4, "dir2_ino4", fp_num, "%u", SI(bitsz(xfs_dir2_ino4_t)),
+         0, fa_ino4, NULL },
+       { FLDT_DIR2_INO8, "dir2_ino8", fp_num, "%llu",
+         SI(bitsz(xfs_dir2_ino8_t)), 0, fa_ino8, NULL },
+       { FLDT_DIR2_INOU, "dir2_inou", NULL, (char *)dir2_inou_flds,
+         dir2_inou_size, FTARG_SIZE, NULL, dir2_inou_flds },
+       { FLDT_DIR2_LEAF_ENTRY, "dir2_leaf_entry", NULL,
+         (char *)dir2_leaf_entry_flds, SI(bitsz(xfs_dir2_leaf_entry_t)), 0,
+         NULL, dir2_leaf_entry_flds },
+       { FLDT_DIR2_LEAF_HDR, "dir2_leaf_hdr", NULL, (char *)dir2_leaf_hdr_flds,
+         SI(bitsz(xfs_dir2_leaf_hdr_t)), 0, NULL, dir2_leaf_hdr_flds },
+       { FLDT_DIR2_LEAF_TAIL, "dir2_leaf_tail", NULL,
+         (char *)dir2_leaf_tail_flds, SI(bitsz(xfs_dir2_leaf_tail_t)), 0, NULL,
+         dir2_leaf_tail_flds },
+       { FLDT_DIR2_SF_ENTRY, "dir2_sf_entry", NULL, (char *)dir2_sf_entry_flds,
+         dir2_sf_entry_size, FTARG_SIZE, NULL, dir2_sf_entry_flds },
+       { FLDT_DIR2_SF_HDR, "dir2_sf_hdr", NULL, (char *)dir2_sf_hdr_flds,
+         dir2_sf_hdr_size, FTARG_SIZE, NULL, dir2_sf_hdr_flds },
+       { FLDT_DIR2_SF_OFF, "dir2_sf_off", fp_num, "%#x",
+         SI(bitsz(xfs_dir2_sf_off_t)), 0, NULL, NULL },
+       { FLDT_DIR2SF, "dir2sf", NULL, (char *)dir2sf_flds, dir2sf_size,
+         FTARG_SIZE, NULL, dir2sf_flds },
+       { FLDT_DIR_BLKINFO, "dir_blkinfo", NULL, (char *)dir_blkinfo_flds,
+         SI(bitsz(struct xfs_da_blkinfo)), 0, NULL, dir_blkinfo_flds },
+       { FLDT_DIR_INO, "dir_ino", fp_num, "%llu", SI(bitsz(xfs_dir_ino_t)), 0,
+         fa_ino, NULL },
+       { FLDT_DIR_LEAF_ENTRY, "dir_leaf_entry", fp_sarray,
+         (char *)dir_leaf_entry_flds, SI(bitsz(struct xfs_dir_leaf_entry)), 0,
+         NULL, dir_leaf_entry_flds },
+       { FLDT_DIR_LEAF_HDR, "dir_leaf_hdr", NULL, (char *)dir_leaf_hdr_flds,
+         SI(bitsz(struct xfs_dir_leaf_hdr)), 0, NULL, dir_leaf_hdr_flds },
+       { FLDT_DIR_LEAF_MAP, "dir_leaf_map", fp_sarray,
+         (char *)dir_leaf_map_flds, SI(bitsz(struct xfs_dir_leaf_map)), 0,
+         NULL, dir_leaf_map_flds },
+       { FLDT_DIR_LEAF_NAME, "dir_leaf_name", NULL, (char *)dir_leaf_name_flds,
+         dir_leaf_name_size, FTARG_SIZE, NULL, dir_leaf_name_flds },
+       { FLDT_DIR_NODE_ENTRY, "dir_node_entry", fp_sarray,
+         (char *)dir_node_entry_flds, SI(bitsz(struct xfs_da_node_entry)), 0,
+         NULL, dir_node_entry_flds },
+       { FLDT_DIR_NODE_HDR, "dir_node_hdr", NULL, (char *)dir_node_hdr_flds,
+         SI(bitsz(struct xfs_da_node_hdr)), 0, NULL, dir_node_hdr_flds },
+       { FLDT_DIR_SF_ENTRY, "dir_sf_entry", NULL, (char *)dir_sf_entry_flds,
+         dir_sf_entry_size, FTARG_SIZE, NULL, dir_sf_entry_flds },
+       { FLDT_DIR_SF_HDR, "dir_sf_hdr", NULL, (char *)dir_sf_hdr_flds,
+         SI(bitsz(struct xfs_dir_sf_hdr)), 0, NULL, dir_sf_hdr_flds },
+       { FLDT_DIRBLOCK, "dirblock", fp_num, "%u", SI(bitsz(__uint32_t)), 0,
+         fa_dirblock, NULL },
+       { FLDT_DIRSHORT, "dirshort", NULL, (char *)dir_shortform_flds,
+         dirshort_size, FTARG_SIZE, NULL, dir_shortform_flds },
+       { FLDT_DISK_DQUOT, "disk_dquot", NULL, (char *)disk_dquot_flds,
+         SI(bitsz(xfs_disk_dquot_t)), 0, NULL, disk_dquot_flds },
+       { FLDT_DQBLK, "dqblk", NULL, (char *)dqblk_flds, SI(bitsz(xfs_dqblk_t)),
+         0, NULL, dqblk_flds },
+       { FLDT_DQID, "dqid", fp_num, "%d", SI(bitsz(xfs_dqid_t)), 0, NULL,
+         NULL },
+       { FLDT_DRFSBNO, "drfsbno", fp_num, "%llu", SI(bitsz(xfs_drfsbno_t)),
+         FTARG_DONULL, fa_drfsbno, NULL },
+       { FLDT_DRTBNO, "drtbno", fp_num, "%llu", SI(bitsz(xfs_drtbno_t)),
+         FTARG_DONULL, fa_drtbno, NULL },
+       { FLDT_EXTLEN, "extlen", fp_num, "%u", SI(bitsz(xfs_extlen_t)), 0, NULL,
+         NULL },
+       { FLDT_EXTNUM, "extnum", fp_num, "%d", SI(bitsz(xfs_extnum_t)),
+         FTARG_SIGNED, NULL, NULL },
+       { FLDT_FSIZE, "fsize", fp_num, "%lld", SI(bitsz(xfs_fsize_t)),
+         FTARG_SIGNED, NULL, NULL },
+       { FLDT_INO, "ino", fp_num, "%llu", SI(bitsz(xfs_ino_t)), FTARG_DONULL,
+         fa_ino, NULL },
+       { FLDT_INOBT, "inobt",  NULL, (char *)inobt_flds, inobt_size,
+         FTARG_SIZE, NULL, inobt_flds },
+       { FLDT_INOBTKEY, "inobtkey", fp_sarray, (char *)inobt_key_flds,
+         SI(bitsz(xfs_inobt_key_t)), 0, NULL, inobt_key_flds },
+       { FLDT_INOBTPTR, "inobtptr", fp_num, "%u", SI(bitsz(xfs_inobt_ptr_t)),
+         0, fa_agblock, NULL },
+       { FLDT_INOBTREC, "inobtrec", fp_sarray, (char *)inobt_rec_flds,
+         SI(bitsz(xfs_inobt_rec_t)), 0, NULL, inobt_rec_flds },
+       { FLDT_INODE, "inode", NULL, (char *)inode_flds, inode_size, FTARG_SIZE,
+         NULL, inode_flds },
+       { FLDT_INOFREE, "inofree", fp_num, "%#llx", SI(bitsz(xfs_inofree_t)), 0,
+         NULL, NULL },
+       { FLDT_INT16D, "int16d", fp_num, "%d", SI(bitsz(__int16_t)),
+         FTARG_SIGNED, NULL, NULL },
+       { FLDT_INT32D, "int32d", fp_num, "%d", SI(bitsz(__int32_t)),
+         FTARG_SIGNED, NULL, NULL },
+       { FLDT_INT64D, "int64d", fp_num, "%lld", SI(bitsz(__int64_t)),
+         FTARG_SIGNED, NULL, NULL },
+       { FLDT_INT8D, "int8d", fp_num, "%d", SI(bitsz(__int8_t)), FTARG_SIGNED,
+         NULL, NULL },
+       { FLDT_NSEC, "nsec", fp_num, "%09d", SI(bitsz(__int32_t)), FTARG_SIGNED,
+         NULL, NULL },
+       { FLDT_QCNT, "qcnt", fp_num, "%llu", SI(bitsz(xfs_qcnt_t)), 0, NULL,
+         NULL },
+       { FLDT_QWARNCNT, "qwarncnt", fp_num, "%u", SI(bitsz(xfs_qwarncnt_t)), 0,
+         NULL, NULL },
+       { FLDT_SB, "sb", NULL, (char *)sb_flds, sb_size, FTARG_SIZE, NULL,
+         sb_flds },
+       { FLDT_TIME, "time", fp_time, NULL, SI(bitsz(__int32_t)), FTARG_SIGNED,
+         NULL, NULL },
+       { FLDT_TIMESTAMP, "timestamp", NULL, (char *)timestamp_flds,
+         SI(bitsz(xfs_timestamp_t)), 0, NULL, timestamp_flds },
+       { FLDT_UINT1, "uint1", fp_num, "%u", SI(1), 0, NULL, NULL },
+       { FLDT_UINT16D, "uint16d", fp_num, "%u", SI(bitsz(__uint16_t)), 0, NULL,
+         NULL },
+       { FLDT_UINT16O, "uint16o", fp_num, "%#o", SI(bitsz(__uint16_t)), 0,
+         NULL, NULL },
+       { FLDT_UINT16X, "uint16x", fp_num, "%#x", SI(bitsz(__uint16_t)), 0,
+         NULL, NULL },
+       { FLDT_UINT32D, "uint32d", fp_num, "%u", SI(bitsz(__uint32_t)), 0, NULL,
+         NULL },
+       { FLDT_UINT32O, "uint32o", fp_num, "%#o", SI(bitsz(__uint32_t)), 0,
+         NULL, NULL },
+       { FLDT_UINT32X, "uint32x", fp_num, "%#x", SI(bitsz(__uint32_t)), 0,
+         NULL, NULL },
+       { FLDT_UINT64D, "uint64d", fp_num, "%llu", SI(bitsz(__uint64_t)), 0,
+         NULL, NULL },
+       { FLDT_UINT64O, "uint64o", fp_num, "%#llo", SI(bitsz(__uint64_t)), 0,
+         NULL, NULL },
+       { FLDT_UINT64X, "uint64x", fp_num, "%#llx", SI(bitsz(__uint64_t)), 0,
+         NULL, NULL },
+       { FLDT_UINT8D, "uint8d", fp_num, "%u", SI(bitsz(__uint8_t)), 0, NULL,
+         NULL },
+       { FLDT_UINT8O, "uint8o", fp_num, "%#o", SI(bitsz(__uint8_t)), 0, NULL,
+         NULL },
+       { FLDT_UINT8X, "uint8x", fp_num, "%#x", SI(bitsz(__uint8_t)), 0, NULL,
+         NULL },
+       { FLDT_UUID, "uuid", fp_uuid, NULL, SI(bitsz(uuid_t)), 0, NULL, NULL },
+       { FLDT_ZZZ, NULL }
+};
+
+int
+bitoffset(
+       const field_t   *f,
+       void            *obj,
+       int             startoff,
+       int             idx)
+{
+
+       if (!(f->flags & FLD_OFFSET)) {
+               if (f->flags & FLD_ARRAY) {
+                       int             abase;
+#ifdef DEBUG
+                       const ftattr_t  *fa = &ftattrtab[f->ftyp];
+#endif
+
+                       abase = (f->flags & FLD_ABASE1) != 0;
+                       ASSERT(fa->ftyp == f->ftyp);
+                       ASSERT((fa->arg & FTARG_SIZE) == 0);
+                       return (int)(__psint_t)f->offset +
+                               (idx - abase) * fsize(f, obj, startoff, idx);
+               } else
+                       return (int)(__psint_t)f->offset;
+       } else
+               return (*f->offset)(obj, startoff, idx);
+}
+
+int
+fcount(
+       const field_t   *f,
+       void            *obj,
+       int             startoff)
+{
+       if (!(f->flags & FLD_COUNT))
+               return (int)(__psint_t)f->count;
+       else
+               return (*f->count)(obj, startoff);
+}
+
+const field_t *
+findfield(
+       char            *name,
+       const field_t   *fields,
+        void            *obj,
+        int             startoff)
+{
+       const field_t   *f;
+
+        /* we only match if this field name matches and has a non-zero count */
+       for (f = fields; f->name; f++)
+               if (strcmp(f->name, name) == 0 && fcount(f, obj, startoff))
+                       return f;
+       return NULL;
+}
+
+int
+fsize(
+       const field_t   *f,
+       void            *obj,
+       int             startoff,
+       int             idx)
+{
+       const ftattr_t  *fa;
+
+       fa = &ftattrtab[f->ftyp];
+       ASSERT(fa->ftyp == f->ftyp);
+       if (!(fa->arg & FTARG_SIZE))
+               return (int)(__psint_t)fa->size;
+       else
+               return (*fa->size)(obj, startoff, idx);
+}
diff --git a/db/field.h b/db/field.h
new file mode 100644 (file)
index 0000000..c5249f7
--- /dev/null
@@ -0,0 +1,224 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+typedef enum fldt      {
+       FLDT_AEXTNUM,
+       FLDT_AGBLOCK,
+       FLDT_AGBLOCKNZ,
+       FLDT_AGF,
+       FLDT_AGFL,
+       FLDT_AGI,
+       FLDT_AGINO,
+       FLDT_AGINONN,
+       FLDT_AGNUMBER,
+       FLDT_ATTR,
+       FLDT_ATTR_BLKINFO,
+       FLDT_ATTR_LEAF_ENTRY,
+       FLDT_ATTR_LEAF_HDR,
+       FLDT_ATTR_LEAF_MAP,
+       FLDT_ATTR_LEAF_NAME,
+       FLDT_ATTR_NODE_ENTRY,
+       FLDT_ATTR_NODE_HDR,
+       FLDT_ATTR_SF_ENTRY,
+       FLDT_ATTR_SF_HDR,
+       FLDT_ATTRBLOCK,
+       FLDT_ATTRSHORT,
+       FLDT_BMAPBTA,
+       FLDT_BMAPBTAKEY,
+       FLDT_BMAPBTAPTR,
+       FLDT_BMAPBTAREC,
+       FLDT_BMAPBTD,
+       FLDT_BMAPBTDKEY,
+       FLDT_BMAPBTDPTR,
+       FLDT_BMAPBTDREC,
+       FLDT_BMROOTA,
+       FLDT_BMROOTAKEY,
+       FLDT_BMROOTAPTR,
+       FLDT_BMROOTD,
+       FLDT_BMROOTDKEY,
+       FLDT_BMROOTDPTR,
+       FLDT_BNOBT,
+       FLDT_BNOBTKEY,
+       FLDT_BNOBTPTR,
+       FLDT_BNOBTREC,
+       FLDT_CEXTFLG,
+       FLDT_CEXTLEN,
+       FLDT_CFILEOFFA,
+       FLDT_CFILEOFFD,
+       FLDT_CFSBLOCK,
+       FLDT_CHARNS,
+       FLDT_CHARS,
+       FLDT_CNTBT,
+       FLDT_CNTBTKEY,
+       FLDT_CNTBTPTR,
+       FLDT_CNTBTREC,
+       FLDT_DEV,
+       FLDT_DFILOFFA,
+       FLDT_DFILOFFD,
+       FLDT_DFSBNO,
+       FLDT_DINODE_A,
+       FLDT_DINODE_CORE,
+       FLDT_DINODE_FMT,
+       FLDT_DINODE_U,
+       FLDT_DIR,
+       FLDT_DIR2,
+       FLDT_DIR2_BLOCK_TAIL,
+       FLDT_DIR2_DATA_FREE,
+       FLDT_DIR2_DATA_HDR,
+       FLDT_DIR2_DATA_OFF,
+       FLDT_DIR2_DATA_OFFNZ,
+       FLDT_DIR2_DATA_UNION,
+       FLDT_DIR2_FREE_HDR,
+       FLDT_DIR2_INO4,
+       FLDT_DIR2_INO8,
+       FLDT_DIR2_INOU,
+       FLDT_DIR2_LEAF_ENTRY,
+       FLDT_DIR2_LEAF_HDR,
+       FLDT_DIR2_LEAF_TAIL,
+       FLDT_DIR2_SF_ENTRY,
+       FLDT_DIR2_SF_HDR,
+       FLDT_DIR2_SF_OFF,
+       FLDT_DIR2SF,
+       FLDT_DIR_BLKINFO,
+       FLDT_DIR_INO,
+       FLDT_DIR_LEAF_ENTRY,
+       FLDT_DIR_LEAF_HDR,
+       FLDT_DIR_LEAF_MAP,
+       FLDT_DIR_LEAF_NAME,
+       FLDT_DIR_NODE_ENTRY,
+       FLDT_DIR_NODE_HDR,
+       FLDT_DIR_SF_ENTRY,
+       FLDT_DIR_SF_HDR,
+       FLDT_DIRBLOCK,
+       FLDT_DIRSHORT,
+       FLDT_DISK_DQUOT,
+       FLDT_DQBLK,
+       FLDT_DQID,
+       FLDT_DRFSBNO,
+       FLDT_DRTBNO,
+       FLDT_EXTLEN,
+       FLDT_EXTNUM,
+       FLDT_FSIZE,
+       FLDT_INO,
+       FLDT_INOBT,
+       FLDT_INOBTKEY,
+       FLDT_INOBTPTR,
+       FLDT_INOBTREC,
+       FLDT_INODE,
+       FLDT_INOFREE,
+       FLDT_INT16D,
+       FLDT_INT32D,
+       FLDT_INT64D,
+       FLDT_INT8D,
+       FLDT_NSEC,
+       FLDT_QCNT,
+       FLDT_QWARNCNT,
+       FLDT_SB,
+       FLDT_TIME,
+       FLDT_TIMESTAMP,
+       FLDT_UINT1,
+       FLDT_UINT16D,
+       FLDT_UINT16O,
+       FLDT_UINT16X,
+       FLDT_UINT32D,
+       FLDT_UINT32O,
+       FLDT_UINT32X,
+       FLDT_UINT64D,
+       FLDT_UINT64O,
+       FLDT_UINT64X,
+       FLDT_UINT8D,
+       FLDT_UINT8O,
+       FLDT_UINT8X,
+       FLDT_UUID,
+       FLDT_ZZZ                        /* mark last entry */
+} fldt_t;
+
+typedef int (*offset_fnc_t)(void *obj, int startoff, int idx);
+#define        OI(o)   ((offset_fnc_t)(__psint_t)(o))
+
+typedef int (*count_fnc_t)(void *obj, int startoff);
+#define        CI(c)   ((count_fnc_t)(__psint_t)(c))
+#define        C1      CI(1)
+
+typedef struct field
+{
+       char            *name;
+       fldt_t          ftyp;
+       offset_fnc_t    offset;
+       count_fnc_t     count;
+       int             flags;
+       typnm_t         next;
+} field_t;
+
+/*
+ * flag values
+ */
+#define        FLD_ABASE1      1       /* field array base is 1 not 0 */
+#define        FLD_SKIPALL     2       /* skip this field in an all-fields print */
+#define        FLD_ARRAY       4       /* this field is an array */
+#define        FLD_OFFSET      8       /* offset value is a function pointer */
+#define        FLD_COUNT       16      /* count value is a function pointer */
+
+typedef int (*size_fnc_t)(void *obj, int startoff, int idx);
+#define        SI(s)   ((size_fnc_t)(__psint_t)(s))
+
+typedef struct ftattr
+{
+       fldt_t          ftyp;
+       char            *name;
+       prfnc_t         prfunc;
+       char            *fmtstr;
+       size_fnc_t      size;
+       int             arg;
+       adfnc_t         adfunc;
+       const field_t   *subfld;
+} ftattr_t;
+extern const ftattr_t  ftattrtab[];
+
+/*
+ * arg values
+ */
+#define        FTARG_SKIPZERO  1       /* skip 0 words */
+#define        FTARG_DONULL    2       /* make -1 words be "null" */
+#define        FTARG_SKIPNULL  4       /* skip -1 words */
+#define        FTARG_SIGNED    8       /* field value is signed */
+#define        FTARG_SIZE      16      /* size field is a function */
+#define        FTARG_SKIPNMS   32      /* skip printing names this time */
+#define        FTARG_OKEMPTY   64      /* ok if this (union type) is empty */
+
+extern int             bitoffset(const field_t *f, void *obj, int startoff,
+                                 int idx);
+extern int             fcount(const field_t *f, void *obj, int startoff);
+extern const field_t   *findfield(char *name, const field_t *fields,
+                                  void *obj, int startoff);
+extern int             fsize(const field_t *f, void *obj, int startoff,
+                                 int idx);
diff --git a/db/flist.c b/db/flist.c
new file mode 100644 (file)
index 0000000..fd5e191
--- /dev/null
@@ -0,0 +1,437 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "type.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "flist.h"
+#include "debug.h"
+#include "output.h"
+#include "malloc.h"
+
+static void    flist_expand_arrays(flist_t *fl);
+static void    flist_expand_structs(flist_t *fl, void *obj);
+static flist_t *flist_replicate(flist_t *fl);
+static ftok_t  *flist_split(char *s);
+static void    ftok_free(ftok_t *ft);
+
+static void
+flist_expand_arrays(
+       flist_t         *fl)
+{
+       const field_t   *f;
+#ifdef DEBUG
+       const ftattr_t  *fa;
+#endif
+       int             high;
+       int             idx;
+       int             low;
+       flist_t         *new;
+       flist_t         *prev;
+       flist_t         *sib;
+
+       f = fl->fld;
+#ifdef DEBUG
+       fa = &ftattrtab[f->ftyp];
+#endif
+       ASSERT(fa->ftyp == f->ftyp);
+       ASSERT(f->flags & FLD_ARRAY);
+       low = fl->low;
+       high = fl->high;
+       fl->high = fl->low;
+       sib = fl->sibling;
+       for (idx = low + 1, prev = fl; idx <= high; idx++) {
+               new = flist_make(f->name);
+               new->fld = f;
+               new->low = new->high = idx;
+               new->flags |= FL_OKLOW | FL_OKHIGH;
+               new->child = flist_replicate(fl->child);
+               prev->sibling = new;
+               prev = new;
+       }
+       prev->sibling = sib;
+}
+
+static void
+flist_expand_structs(
+       flist_t         *fl,
+       void            *obj)
+{
+       const field_t   *cf;
+       const field_t   *f;
+       const ftattr_t  *fa;
+       flist_t         *new;
+       flist_t         *prev;
+
+       f = fl->fld;
+       fa = &ftattrtab[f->ftyp];
+       ASSERT(fa->ftyp == f->ftyp);
+       ASSERT(fa->subfld != NULL);
+       ASSERT(fl->child == NULL);
+       for (cf = fa->subfld, prev = NULL; cf->name != NULL; cf++) {
+               if (fcount(cf, obj, fl->offset) == 0)
+                       continue;
+               if (cf->flags & FLD_SKIPALL)
+                       continue;
+               new = flist_make(cf->name);
+               new->fld = cf;
+               if (prev)
+                       prev->sibling = new;
+               else
+                       fl->child = new;
+               prev = new;
+       }
+}
+
+void
+flist_free(
+       flist_t *fl)
+{
+       if (fl->child)
+               flist_free(fl->child);
+       if (fl->sibling)
+               flist_free(fl->sibling);
+       if (fl->name)
+               xfree(fl->name);
+       xfree(fl);
+}
+
+flist_t *
+flist_make(
+       char    *name)
+{
+       flist_t *fl;
+
+       fl = xmalloc(sizeof(*fl));
+       fl->name = xstrdup(name);
+       fl->fld = NULL;
+       fl->child = NULL;
+       fl->sibling = NULL;
+       fl->low = 0;
+       fl->high = 0;
+       fl->flags = 0;
+       fl->offset = 0;
+       return fl;
+}
+
+int
+flist_parse(
+       const field_t   *fields,
+       flist_t         *fl,
+       void            *obj,
+       int             startoff)
+{
+       const field_t   *f;
+       const ftattr_t  *fa;
+       int             high;
+       int             low;
+
+       while (fl) {
+               f = findfield(fl->name, fields, obj, startoff);
+               if (f == NULL) {
+                       dbprintf("field %s not found\n", fl->name);
+                       return 0;
+               }
+               fl->fld = f;
+               fa = &ftattrtab[f->ftyp];
+               ASSERT(fa->ftyp == f->ftyp);
+               if (f->flags & FLD_ARRAY) {
+                       low = (f->flags & FLD_ABASE1) != 0;
+                       high = fcount(f, obj, startoff) + low - 1;
+                       if (low > high) {
+                               dbprintf("no elements in %s\n", fl->name);
+                               return 0;
+                       }
+                       if (fl->flags & FL_OKHIGH) {
+                               if (fl->low < low || fl->low > high ||
+                                   fl->high < low || fl->high > high) {
+                                       dbprintf("indices %d-%d for field %s "
+                                                "out of range %d-%d\n",
+                                               fl->low, fl->high, fl->name,
+                                               low, high);
+                                       return 0;
+                               }
+                       } else if (fl->flags & FL_OKLOW) {
+                               if (fl->low < low || fl->low > high) {
+                                       dbprintf("index %d for field %s out of "
+                                                "range %d-%d\n",
+                                               fl->low, fl->name, low, high);
+                                       return 0;
+                               }
+                               fl->high = fl->low;
+                               fl->flags |= FL_OKHIGH;
+                       } else {
+                               fl->low = low;
+                               fl->high = high;
+                               fl->flags |= FL_OKLOW | FL_OKHIGH;
+                       }
+               } else {
+                       if (fl->flags & FL_OKLOW) {
+                               dbprintf("field %s is not an array\n",
+                                       fl->name);
+                               return 0;
+                       }
+               }
+               fl->offset = startoff + bitoffset(f, obj, startoff, fl->low);
+               if ((fl->child != NULL || fa->prfunc == NULL) &&
+                   (f->flags & FLD_ARRAY) && fl->low != fl->high)
+                       flist_expand_arrays(fl);
+               if (fa->prfunc == NULL && fl->child == NULL)
+                       flist_expand_structs(fl, obj);
+               if (fl->child) {
+                       if (fa->subfld == NULL) {
+                               dbprintf("field %s has no subfields\n",
+                                       fl->name);
+                               return 0;
+                       }
+                       if (!flist_parse(fa->subfld, fl->child, obj,
+                                       fl->offset))
+                               return 0;
+               }
+               fl = fl->sibling;
+       }
+       return 1;
+}
+
+void
+flist_print(
+       flist_t *fl)
+{
+       if (!(debug_state & DEBUG_FLIST))
+               return;
+       while (fl) {
+               dbprintf("fl@%p:\n", fl);
+               dbprintf("\tname=%s, fld=%p, child=%p, sibling=%p\n",
+                       fl->name, fl->fld, fl->child, fl->sibling);
+               dbprintf("\tlow=%d, high=%d, flags=%d (%s%s), offset=%d\n",
+                       fl->low, fl->high, fl->flags,
+                       fl->flags & FL_OKLOW ? "oklow " : "",
+                       fl->flags & FL_OKHIGH ? "okhigh" : "", fl->offset);
+               dbprintf("\tfld->name=%s, fld->ftyp=%d (%s)\n",
+                       fl->fld->name, fl->fld->ftyp,
+                       ftattrtab[fl->fld->ftyp].name);
+               dbprintf("\tfld->flags=%d (%s%s%s%s%s)\n", fl->fld->flags,
+                       fl->fld->flags & FLD_ABASE1 ? "abase1 " : "",
+                       fl->fld->flags & FLD_SKIPALL ? "skipall " : "",
+                       fl->fld->flags & FLD_ARRAY ? "array " : "",
+                       fl->fld->flags & FLD_OFFSET ? "offset " : "",
+                       fl->fld->flags & FLD_COUNT ? "count " : "");
+               if (fl->child)
+                       flist_print(fl->child);
+               fl = fl->sibling;
+       }
+}
+
+static flist_t *
+flist_replicate(
+       flist_t *f)
+{
+       flist_t *new;
+
+       if (f == NULL)
+               return NULL;
+       new = flist_make(f->name);
+       new->fld = f->fld;
+       new->child = flist_replicate(f->child);
+       new->sibling = flist_replicate(f->sibling);
+       new->low = f->low;
+       new->high = f->high;
+       new->flags = f->flags;
+       new->offset = f->offset;
+       return new;
+}
+
+flist_t *
+flist_scan(
+       char    *name)
+{
+       flist_t *fl;
+       flist_t *lfl;
+       flist_t *nfl;
+       int     num;
+       ftok_t  *p;
+       ftok_t  *v;
+       char    *x;
+
+       v = flist_split(name);
+       if (!v)
+               return NULL;
+       p = v;
+       fl = lfl = NULL;
+       while (p->tokty != TT_END) {
+               if (p->tokty != TT_NAME)
+                       goto bad;
+               nfl = flist_make(p->tok);
+               if (lfl)
+                       lfl->child = nfl;
+               else
+                       fl = nfl;
+               lfl = nfl;
+               p++;
+               if (p->tokty == TT_LB) {
+                       p++;
+                       if (p->tokty != TT_NUM)
+                               goto bad;
+                       num = (int)strtoul(p->tok, &x, 0);
+                       if (*x != '\0')
+                               goto bad;
+                       nfl->flags |= FL_OKLOW;
+                       nfl->low = num;
+                       p++;
+                       if (p->tokty == TT_DASH) {
+                               p++;
+                               if (p->tokty != TT_NUM)
+                                       goto bad;
+                               num = (int)strtoul(p->tok, &x, 0);
+                               if (*x != '\0')
+                                       goto bad;
+                               nfl->flags |= FL_OKHIGH;
+                               nfl->high = num;
+                               p++;
+                       }
+                       if (p->tokty != TT_RB)
+                               goto bad;
+                       p++;
+               }
+               if (p->tokty == TT_DOT) {
+                       p++;
+                       if (p->tokty == TT_END)
+                               goto bad;
+               }
+       }
+       ftok_free(v);
+       return fl;
+bad:
+       dbprintf("bad syntax in field name %s\n", name);
+       ftok_free(v);
+       if (fl)
+               flist_free(fl);
+       return NULL;
+}
+
+static ftok_t *
+flist_split(
+       char            *s)
+{
+       char            *a;
+       int             i;
+       static char     *idchars;
+       static char     *initidchar;
+       int             l;
+       int             tailskip = 0;
+       static char     *numchars;
+       static char     *xnumchars;  /* extended for hex conversion */
+       int             nv;
+       static char     punctchars[] = "[-].";
+       static tokty_t  puncttypes[] = { TT_LB, TT_DASH, TT_RB, TT_DOT };
+       tokty_t         t;
+       ftok_t          *v;
+
+       if (idchars == NULL) {
+               idchars = xmalloc(26 + 10 + 1 + 1);
+               initidchar = xmalloc(26 + 1);
+               numchars = xmalloc(10 + 1);
+               xnumchars = xmalloc(12 + 1);
+               for (i = 'a'; i <= 'z'; i++) {
+                       idchars[i - 'a'] = i;
+                       initidchar[i - 'a'] = i;
+               }
+
+               for (i = '0'; i <= '9'; i++) {
+                       idchars[26 + (i - '0')] = i;
+                       numchars[i - '0'] = i;
+                       xnumchars[i - '0'] = i;
+               }
+               idchars[26 + 10] = '_';
+               idchars[26 + 10 + 1] = '\0';
+               initidchar[26] = '\0';
+               numchars[10] = '\0';
+               xnumchars[10] = 'x';
+               xnumchars[11] = 'X';
+               xnumchars[12] = '\0';
+       }
+       nv = 0;
+       v = xmalloc(sizeof(*v));
+       v->tok = NULL;
+       while (*s) {
+               /* need to add string handling */
+               if (*s == '\"') {
+                       s++; /* skip first quote */
+                       if ((a = strrchr(s, '\"')) == NULL) {
+                               dbprintf("missing closing quote %s\n", s);
+                               ftok_free(v);
+                               return NULL;
+                       }
+                       tailskip = 1; /* skip remaing quote */
+                       l = (int)(a - s);
+                       t = TT_STRING;
+               } else if (strchr(initidchar, *s)) {
+                       l = (int)strspn(s, idchars);
+                       t = TT_NAME;
+               } else if (strchr(numchars, *s)) {
+                       l = (int)strspn(s, xnumchars);
+                       t = TT_NUM;
+               } else if (a = strchr(punctchars, *s)) {
+                       l = 1;
+                       t = puncttypes[a - punctchars];
+               } else {
+                       dbprintf("bad character in field %s\n", s);
+                       ftok_free(v);
+                       return NULL;
+               }
+               a = xmalloc(l + 1);
+               strncpy(a, s, l);
+               a[l] = '\0';
+               v = xrealloc(v, (nv + 2) * sizeof(*v));
+               v[nv].tok = a;
+               v[nv].tokty = t;
+               nv++;
+               s += l + tailskip;
+               tailskip = 0;
+       }
+       v[nv].tok = NULL;
+       v[nv].tokty = TT_END;
+       return v;
+}
+
+static void
+ftok_free(
+       ftok_t  *ft)
+{
+       ftok_t  *p;
+
+       for (p = ft; p->tok; p++)
+               xfree(p->tok);
+       xfree(ft);
+}
diff --git a/db/flist.h b/db/flist.h
new file mode 100644 (file)
index 0000000..eab154d
--- /dev/null
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+struct field;
+
+typedef struct flist {
+       char                    *name;
+       const struct field      *fld;
+       struct flist            *child;
+       struct flist            *sibling;
+       int                     low;
+       int                     high;
+       int                     flags;
+       int                     offset;
+} flist_t;
+
+/*
+ * Flags for flist
+ */
+#define        FL_OKLOW        1
+#define        FL_OKHIGH       2
+
+typedef enum tokty {
+       TT_NAME, TT_NUM, TT_STRING, TT_LB, TT_RB, TT_DASH, TT_DOT, TT_END
+} tokty_t;
+
+typedef struct ftok {
+       char    *tok;
+       tokty_t tokty;
+} ftok_t;
+
+extern void    flist_free(flist_t *fl);
+extern flist_t *flist_make(char *name);
+extern int     flist_parse(const struct field *fields, flist_t *fl, void *obj,
+                           int startoff);
+extern void    flist_print(flist_t *fl);
+extern flist_t *flist_scan(char *name);
diff --git a/db/fprint.c b/db/fprint.c
new file mode 100644 (file)
index 0000000..3e4a9bf
--- /dev/null
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include <getopt.h>
+#include <ctype.h>
+#include <time.h>
+#include "type.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "inode.h"
+#include "inobt.h"
+#include "bit.h"
+#include "print.h"
+#include "output.h"
+#include "sig.h"
+#include "malloc.h"
+
+int
+fp_charns(
+       void    *obj,
+       int     bit,
+       int     count,
+       char    *fmtstr,
+       int     size,
+       int     arg,
+       int     base,
+       int     array)
+{
+       int     i;
+       char    *p;
+
+       ASSERT(bitoffs(bit) == 0);
+       ASSERT(size == bitsz(char));
+       dbprintf("\"");
+       for (i = 0, p = (char *)obj + byteize(bit);
+            i < count && !seenint();
+            i++, p++) {
+               if (*p == '\\' || *p == '\'' || *p == '"' || *p == '\?')
+                       dbprintf("\\%c", *p);
+               else if (isgraph(*p) || *p == ' ')
+                       dbprintf("%c", *p);
+               else if (*p == '\a' || *p == '\b' || *p == '\f' || *p == '\n' ||
+                        *p == '\r' || *p == '\t' || *p == '\v')
+                       dbprintf("\\%c", *p + ('a' - '\a'));
+               else
+                       dbprintf("\\%03o", *p & 0xff);
+       }
+       dbprintf("\"");
+       return 1;
+}
+
+int
+fp_num(
+       void            *obj,
+       int             bit,
+       int             count,
+       char            *fmtstr,
+       int             size,
+       int             arg,
+       int             base,
+       int             array)
+{
+       int             bitpos;
+       int             i;
+       int             isnull;
+       __int64_t       val;
+
+       for (i = 0, bitpos = bit;
+            i < count && !seenint();
+            i++, bitpos += size) {
+               val = getbitval(obj, bitpos, size,
+                       (arg & FTARG_SIGNED) ? BVSIGNED : BVUNSIGNED);
+               if ((arg & FTARG_SKIPZERO) && val == 0)
+                       continue;
+               isnull = (arg & FTARG_SIGNED) || size == 64 ?
+                       val == -1LL : val == ((1LL << size) - 1LL);
+               if ((arg & FTARG_SKIPNULL) && isnull)
+                       continue;
+               if (array)
+                       dbprintf("%d:", i + base);
+               if ((arg & FTARG_DONULL) && isnull)
+                       dbprintf("null");
+               else if (size > 32)
+                       dbprintf(fmtstr, val);
+               else
+                       dbprintf(fmtstr, (__int32_t)val);
+               if (i < count - 1)
+                       dbprintf(" ");
+       }
+       return 1;
+}
+
+/*ARGSUSED*/
+int
+fp_sarray(
+       void    *obj,
+       int     bit,
+       int     count,
+       char    *fmtstr,
+       int     size,
+       int     arg,
+       int     base,
+       int     array)
+{
+       print_sarray(obj, bit, count, size, base, array,
+               (const field_t *)fmtstr, (arg & FTARG_SKIPNMS) != 0);
+       return 1;
+}
+
+/*ARGSUSED*/
+int
+fp_time(
+       void    *obj,
+       int     bit,
+       int     count,
+       char    *fmtstr,
+       int     size,
+       int     arg,
+       int     base,
+       int     array)
+{
+       int     bitpos;
+       char    *c;
+       int     i;
+        time_t  t;
+
+       ASSERT(bitoffs(bit) == 0);
+       for (i = 0, bitpos = bit;
+            i < count && !seenint();
+            i++, bitpos += size) {
+               if (array)
+                       dbprintf("%d:", i + base);
+                t=(time_t)getbitval((char *)obj + byteize(bitpos), 0, sizeof(time_t)*8, 0);
+               c = ctime(&t);
+               dbprintf("%24.24s", c);
+               if (i < count - 1)
+                       dbprintf(" ");
+       }
+       return 1;
+}
+
+/*ARGSUSED*/
+int
+fp_uuid(
+       void    *obj,
+       int     bit,
+       int     count,
+       char    *fmtstr,
+       int     size,
+       int     arg,
+       int     base,
+       int     array)
+{
+       char    bp[40]; /* UUID string is 36 chars + trailing '\0' */
+       int     i;
+       uuid_t  *p;
+
+       ASSERT(bitoffs(bit) == 0);
+       for (p = (uuid_t *)((char *)obj + byteize(bit)), i = 0;
+            i < count && !seenint();
+            i++, p++) {
+               if (array)
+                       dbprintf("%d:", i + base);
+               uuid_unparse(*p, bp);
+               dbprintf("%s", bp);
+               if (i < count - 1)
+                       dbprintf(" ");
+       }
+       return 1;
+}
diff --git a/db/fprint.h b/db/fprint.h
new file mode 100644 (file)
index 0000000..01c9385
--- /dev/null
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+typedef int (*prfnc_t)(void *obj, int bit, int count, char *fmtstr, int size,
+                      int arg, int base, int array);
+
+extern int     fp_charns(void *obj, int bit, int count, char *fmtstr, int size,
+                         int arg, int base, int array);
+extern int     fp_num(void *obj, int bit, int count, char *fmtstr, int size,
+                      int arg, int base, int array);
+extern int     fp_sarray(void *obj, int bit, int count, char *fmtstr, int size,
+                         int arg, int base, int array);
+extern int     fp_time(void *obj, int bit, int count, char *fmtstr, int size,
+                       int arg, int base, int array);
+extern int     fp_uuid(void *obj, int bit, int count, char *fmtstr, int size,
+                       int arg, int base, int array);
diff --git a/db/frag.c b/db/frag.c
new file mode 100644 (file)
index 0000000..100d11a
--- /dev/null
+++ b/db/frag.c
@@ -0,0 +1,534 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include <getopt.h>
+#include <sys/time.h>
+#include "bmap.h"
+#include "command.h"
+#include "data.h"
+#include "frag.h"
+#include "io.h"
+#include "output.h"
+#include "type.h"
+#include "mount.h"
+#include "malloc.h"
+
+typedef struct extent {
+       xfs_fileoff_t   startoff;
+       xfs_filblks_t   blockcount;
+} extent_t;
+
+typedef        struct extmap {
+       int             naents;
+       int             nents;
+       extent_t        ents[1];
+} extmap_t;
+#define        EXTMAP_SIZE(n)  \
+       (offsetof(extmap_t, ents) + (sizeof(extent_t) * (n)))
+
+static int             aflag;
+static int             dflag;
+static __uint64_t      extcount_actual;
+static __uint64_t      extcount_ideal;
+static int             fflag;
+static int             lflag;
+static int             qflag;
+static int             Rflag;
+static int             rflag;
+static int             vflag;
+
+typedef void   (*scan_lbtree_f_t)(xfs_btree_lblock_t   *block,
+                                  int                  level,
+                                  extmap_t             **extmapp,
+                                  typnm_t              btype);
+
+typedef void   (*scan_sbtree_f_t)(xfs_btree_sblock_t   *block,
+                                  int                  level,
+                                  xfs_agf_t            *agf);
+
+static extmap_t                *extmap_alloc(xfs_extnum_t nex);
+static xfs_extnum_t    extmap_ideal(extmap_t *extmap);
+static void            extmap_set_ext(extmap_t **extmapp, xfs_fileoff_t o,
+                                      xfs_extlen_t c);
+static int             frag_f(int argc, char **argv);
+static int             init(int argc, char **argv);
+static void            process_bmbt_reclist(xfs_bmbt_rec_32_t *rp, int numrecs,
+                                            extmap_t **extmapp);
+static void            process_btinode(xfs_dinode_t *dip, extmap_t **extmapp,
+                                       int whichfork);
+static void            process_exinode(xfs_dinode_t *dip, extmap_t **extmapp,
+                                       int whichfork);
+static void            process_fork(xfs_dinode_t *dip, int whichfork);
+static void            process_inode(xfs_agf_t *agf, xfs_agino_t agino,
+                                     xfs_dinode_t *dip);
+static void            scan_ag(xfs_agnumber_t agno);
+static void            scan_lbtree(xfs_fsblock_t root, int nlevels,
+                                   scan_lbtree_f_t func, extmap_t **extmapp,
+                                   typnm_t btype);
+static void            scan_sbtree(xfs_agf_t *agf, xfs_agblock_t root,
+                                   int nlevels, scan_sbtree_f_t func,
+                                   typnm_t btype);
+static void            scanfunc_bmap(xfs_btree_lblock_t *ablock, int level,
+                                     extmap_t **extmapp, typnm_t btype);
+static void            scanfunc_ino(xfs_btree_sblock_t *ablock, int level,
+                                    xfs_agf_t *agf);
+
+static const cmdinfo_t frag_cmd = 
+       { "frag", NULL, frag_f, 0, -1, 0,
+         "[-a] [-d] [-f] [-l] [-r]",
+         "get file fragmentation data", NULL };
+
+static extmap_t *
+extmap_alloc(
+       xfs_extnum_t    nex)
+{
+       extmap_t        *extmap;
+
+       if (nex < 1)
+               nex = 1;
+       extmap = xmalloc(EXTMAP_SIZE(nex));
+       extmap->naents = nex;
+       extmap->nents = 0;
+       return extmap;
+}
+
+static xfs_extnum_t
+extmap_ideal(
+       extmap_t        *extmap)
+{
+       extent_t        *ep;
+       xfs_extnum_t    rval;
+
+       for (ep = &extmap->ents[0], rval = 0;
+            ep < &extmap->ents[extmap->nents];
+            ep++) {
+               if (ep == &extmap->ents[0] ||
+                   ep->startoff != ep[-1].startoff + ep[-1].blockcount)
+                       rval++;
+       }
+       return rval;
+}
+
+static void
+extmap_set_ext(
+       extmap_t        **extmapp,
+       xfs_fileoff_t   o,
+       xfs_extlen_t    c)
+{
+       extmap_t        *extmap;
+       extent_t        *ent;
+
+       extmap = *extmapp;
+       if (extmap->nents == extmap->naents) {
+               extmap->naents++;
+               extmap = xrealloc(extmap, EXTMAP_SIZE(extmap->naents));
+               *extmapp = extmap;
+       }
+       ent = &extmap->ents[extmap->nents];
+       ent->startoff = o;
+       ent->blockcount = c;
+       extmap->nents++;
+}
+
+void
+frag_init(void)
+{
+       add_command(&frag_cmd);
+}
+
+/*
+ * Get file fragmentation information.
+ */
+static int
+frag_f(
+       int             argc,
+       char            **argv)
+{
+       xfs_agnumber_t  agno;
+       double          answer;
+
+       if (!init(argc, argv))
+               return 0;
+       for (agno = 0; agno < mp->m_sb.sb_agcount; agno++)
+               scan_ag(agno);
+       if (extcount_actual)
+               answer = (double)(extcount_actual - extcount_ideal) * 100.0 /
+                        (double)extcount_actual;
+       else
+               answer = 0.0;
+       dbprintf("actual %llu, ideal %llu, fragmentation factor %.2f%%\n",
+               extcount_actual, extcount_ideal, answer);
+       return 0;
+}
+
+static int
+init(
+       int             argc,
+       char            **argv)
+{
+       int             c;
+
+       aflag = dflag = fflag = lflag = qflag = Rflag = rflag = vflag = 0;
+       optind = 0;
+       while ((c = getopt(argc, argv, "adflqRrv")) != EOF) {
+               switch (c) {
+               case 'a':
+                       aflag = 1;
+                       break;
+               case 'd':
+                       dflag = 1;
+                       break;
+               case 'f':
+                       fflag = 1;
+                       break;
+               case 'l':
+                       lflag = 1;
+                       break;
+               case 'q':
+                       qflag = 1;
+                       break;
+               case 'R':
+                       Rflag = 1;
+                       break;
+               case 'r':
+                       rflag = 1;
+                       break;
+               case 'v':
+                       vflag = 1;
+                       break;
+               default:
+                       dbprintf("bad option for frag command\n");
+                       return 0;
+               }
+       }
+       if (!aflag && !dflag && !fflag && !lflag && !qflag && !Rflag && !rflag)
+               aflag = dflag = fflag = lflag = qflag = Rflag = rflag = 1;
+       extcount_actual = extcount_ideal = 0;
+       return 1;
+}
+
+static void
+process_bmbt_reclist(
+       xfs_bmbt_rec_32_t       *rp,
+       int                     numrecs,
+       extmap_t                **extmapp)
+{
+       xfs_dfilblks_t          c;
+       int                     f;
+       int                     i;
+       xfs_dfiloff_t           o;
+       xfs_dfsbno_t            s;
+
+       for (i = 0; i < numrecs; i++, rp++) {
+               convert_extent((xfs_bmbt_rec_64_t *)rp, &o, &s, &c, &f);
+               extmap_set_ext(extmapp, (xfs_fileoff_t)o, (xfs_extlen_t)c);
+       }
+}
+
+static void
+process_btinode(
+       xfs_dinode_t            *dip,
+       extmap_t                **extmapp,
+       int                     whichfork)
+{
+       xfs_bmdr_block_t        *dib;
+       int                     i;
+       xfs_bmbt_ptr_t          *pp;
+       xfs_bmbt_rec_32_t       *rp;
+
+       dib = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork);
+       if (INT_GET(dib->bb_level, ARCH_CONVERT) == 0) {
+               rp = (xfs_bmbt_rec_32_t *)XFS_BTREE_REC_ADDR(
+                       XFS_DFORK_SIZE(dip, mp, whichfork),
+                       xfs_bmdr, dib, 1,
+                       XFS_BTREE_BLOCK_MAXRECS(XFS_DFORK_SIZE(dip, mp,
+                                       whichfork),
+                               xfs_bmdr, 1));
+               process_bmbt_reclist(rp, INT_GET(dib->bb_numrecs, ARCH_CONVERT), extmapp);
+               return;
+       }
+       pp = XFS_BTREE_PTR_ADDR(XFS_DFORK_SIZE(dip, mp, whichfork),
+               xfs_bmdr, dib, 1,
+               XFS_BTREE_BLOCK_MAXRECS(XFS_DFORK_SIZE(dip, mp, whichfork),
+                                       xfs_bmdr, 0));
+       for (i = 0; i < INT_GET(dib->bb_numrecs, ARCH_CONVERT); i++)
+               scan_lbtree((xfs_fsblock_t)INT_GET(pp[i], ARCH_CONVERT), INT_GET(dib->bb_level, ARCH_CONVERT), scanfunc_bmap,
+                       extmapp,
+                       whichfork == XFS_DATA_FORK ? TYP_BMAPBTD : TYP_BMAPBTA);
+}
+
+static void
+process_exinode(
+       xfs_dinode_t            *dip,
+       extmap_t                **extmapp,
+       int                     whichfork)
+{
+       xfs_bmbt_rec_32_t       *rp;
+
+       rp = (xfs_bmbt_rec_32_t *)XFS_DFORK_PTR(dip, whichfork);
+       process_bmbt_reclist(rp, XFS_DFORK_NEXTENTS(dip, whichfork), extmapp);
+}
+
+static void
+process_fork(
+       xfs_dinode_t    *dip,
+       int             whichfork)
+{
+       extmap_t        *extmap;
+       int             nex;
+
+       nex = XFS_DFORK_NEXTENTS(dip, whichfork);
+       if (!nex)
+               return;
+       extmap = extmap_alloc(nex);
+       switch (XFS_DFORK_FORMAT(dip, whichfork)) {
+       case XFS_DINODE_FMT_EXTENTS:
+               process_exinode(dip, &extmap, whichfork);
+               break;
+       case XFS_DINODE_FMT_BTREE:
+               process_btinode(dip, &extmap, whichfork);
+               break;
+       }
+       extcount_actual += extmap->nents;
+       extcount_ideal += extmap_ideal(extmap);
+       xfree(extmap);
+}
+
+static void
+process_inode(
+       xfs_agf_t               *agf,
+       xfs_agino_t             agino,
+       xfs_dinode_t            *dip)
+{
+       __uint64_t              actual;
+       xfs_dinode_core_t       *dic;
+       __uint64_t              ideal;
+       xfs_ino_t               ino;
+       int                     skipa;
+       int                     skipd;
+
+       dic = &dip->di_core;
+       ino = XFS_AGINO_TO_INO(mp, INT_GET(agf->agf_seqno, ARCH_CONVERT), agino);
+       switch (INT_GET(dic->di_mode, ARCH_CONVERT) & IFMT) {
+       case IFDIR:
+               skipd = !dflag;
+               break;
+       case IFREG:
+               if (!rflag && (INT_GET(dic->di_flags, ARCH_CONVERT) & XFS_DIFLAG_REALTIME))
+                       skipd = 1;
+               else if (!Rflag &&
+                        (ino == mp->m_sb.sb_rbmino ||
+                         ino == mp->m_sb.sb_rsumino))
+                       skipd = 1;
+               else if (!qflag &&
+                        (ino == mp->m_sb.sb_uquotino ||
+                         ino == mp->m_sb.sb_pquotino))
+                       skipd = 1;
+               else
+                       skipd = !fflag;
+               break;
+       case IFLNK:
+               skipd = !lflag;
+               break;
+       default:
+               skipd = 1;
+               break;
+       }
+       actual = extcount_actual;
+       ideal = extcount_ideal;
+       if (!skipd)
+               process_fork(dip, XFS_DATA_FORK);
+       skipa = !aflag || !XFS_DFORK_Q(dip);
+       if (!skipa)
+               process_fork(dip, XFS_ATTR_FORK);
+       if (vflag && (!skipd || !skipa))
+               dbprintf("inode %lld actual %lld ideal %lld\n",
+                       ino, extcount_actual - actual, extcount_ideal - ideal);
+}
+
+static void
+scan_ag(
+       xfs_agnumber_t  agno)
+{
+       xfs_agf_t       *agf;
+       xfs_agi_t       *agi;
+
+       push_cur();
+       set_cur(&typtab[TYP_AGF], XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR), 1,
+               DB_RING_IGN, NULL);
+       if ((agf = iocur_top->data) == NULL) {
+               dbprintf("can't read agf block for ag %u\n", agno);
+               pop_cur();
+               return;
+       }
+       push_cur();
+       set_cur(&typtab[TYP_AGI], XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR), 1,
+               DB_RING_IGN, NULL);
+       if ((agi = iocur_top->data) == NULL) {
+               dbprintf("can't read agi block for ag %u\n", agno);
+               pop_cur();
+               pop_cur();
+               return;
+       }
+       scan_sbtree(agf,
+               INT_GET(agi->agi_root, ARCH_CONVERT),
+               INT_GET(agi->agi_level, ARCH_CONVERT),
+               scanfunc_ino, TYP_INOBT);
+       pop_cur();
+       pop_cur();
+}
+
+static void
+scan_lbtree(
+       xfs_fsblock_t   root,
+       int             nlevels,
+       scan_lbtree_f_t func,
+       extmap_t        **extmapp,
+       typnm_t         btype)
+{
+       push_cur();
+       set_cur(&typtab[btype], XFS_FSB_TO_DADDR(mp, root), blkbb, DB_RING_IGN,
+               NULL);
+       if (iocur_top->data == NULL) {
+               dbprintf("can't read btree block %u/%u\n",
+                       XFS_FSB_TO_AGNO(mp, root),
+                       XFS_FSB_TO_AGBNO(mp, root));
+               return;
+       }
+       (*func)(iocur_top->data, nlevels - 1, extmapp, btype);
+       pop_cur();
+}
+
+static void
+scan_sbtree(
+       xfs_agf_t       *agf,
+       xfs_agblock_t   root,
+       int             nlevels,
+       scan_sbtree_f_t func,
+       typnm_t         btype)
+{
+       xfs_agnumber_t  seqno = INT_GET(agf->agf_seqno, ARCH_CONVERT);
+
+       push_cur();
+       set_cur(&typtab[btype], XFS_AGB_TO_DADDR(mp, seqno, root),
+               blkbb, DB_RING_IGN, NULL);
+       if (iocur_top->data == NULL) {
+               dbprintf("can't read btree block %u/%u\n", seqno, root);
+               return;
+       }
+       (*func)(iocur_top->data, nlevels - 1, agf);
+       pop_cur();
+}
+
+static void
+scanfunc_bmap(
+       xfs_btree_lblock_t      *ablock,
+       int                     level,
+       extmap_t                **extmapp,
+       typnm_t                 btype)
+{
+       xfs_bmbt_block_t        *block = (xfs_bmbt_block_t *)ablock;
+       int                     i;
+       xfs_bmbt_ptr_t          *pp;
+       xfs_bmbt_rec_32_t       *rp;
+
+       if (level == 0) {
+               rp = (xfs_bmbt_rec_32_t *)
+                       XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt,
+                       block, 1, mp->m_bmap_dmxr[0]);
+               process_bmbt_reclist(rp, INT_GET(block->bb_numrecs, ARCH_CONVERT), extmapp);
+               return;
+       }
+       pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, block, 1,
+               mp->m_bmap_dmxr[0]);
+       for (i = 0; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++)
+               scan_lbtree(INT_GET(pp[i], ARCH_CONVERT), level, scanfunc_bmap, extmapp, btype);
+}
+
+static void
+scanfunc_ino(
+       xfs_btree_sblock_t      *ablock,
+       int                     level,
+       xfs_agf_t               *agf)
+{
+       xfs_agino_t             agino;
+       xfs_inobt_block_t       *block = (xfs_inobt_block_t *)ablock;
+       xfs_agnumber_t          seqno = INT_GET(agf->agf_seqno, ARCH_CONVERT);
+       int                     i;
+       int                     j;
+       int                     off;
+       xfs_inobt_ptr_t         *pp;
+       xfs_inobt_rec_t         *rp;
+
+       if (level == 0) {
+               rp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_inobt, block,
+                       1, mp->m_inobt_mxr[0]);
+               for (i = 0; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++) {
+                       agino = INT_GET(rp[i].ir_startino, ARCH_CONVERT);
+                       off = XFS_INO_TO_OFFSET(mp, agino);
+                       push_cur();
+                       set_cur(&typtab[TYP_INODE],
+                               XFS_AGB_TO_DADDR(mp, seqno,
+                                                XFS_AGINO_TO_AGBNO(mp, agino)),
+                               (int)XFS_FSB_TO_BB(mp, XFS_IALLOC_BLOCKS(mp)),
+                               DB_RING_IGN, NULL);
+                       if (iocur_top->data == NULL) {
+                               dbprintf("can't read inode block %u/%u\n",
+                                       seqno, XFS_AGINO_TO_AGBNO(mp, agino));
+                               continue;
+                       }
+                       for (j = 0; j < XFS_INODES_PER_CHUNK; j++) {
+                                xfs_dinode_t            *dip;
+                                xfs_dinode_core_t       tdic;
+                                
+                                dip=(xfs_dinode_t *)((char *)iocur_top->data + ((off + j) << mp->m_sb.sb_inodelog));
+                            
+                                /* convert the core, then copy it back into the inode */
+                               libxfs_xlate_dinode_core( (xfs_caddr_t)
+                                       &dip->di_core, &tdic, 1, ARCH_CONVERT );
+                               memcpy(&dip->di_core, &tdic, sizeof(xfs_dinode_core_t));
+        
+                               if (XFS_INOBT_IS_FREE(&rp[i], j, ARCH_CONVERT))
+                                       continue;
+                               process_inode(agf, agino + j,
+                                       (xfs_dinode_t *)((char *)iocur_top->data + ((off + j) << mp->m_sb.sb_inodelog)));
+                       }
+                       pop_cur();
+               }
+               return;
+       }
+       pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_inobt, block, 1,
+               mp->m_inobt_mxr[1]);
+       for (i = 0; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++)
+               scan_sbtree(agf, INT_GET(pp[i], ARCH_CONVERT), level, scanfunc_ino, TYP_INOBT);
+}
diff --git a/db/frag.h b/db/frag.h
new file mode 100644 (file)
index 0000000..b7986ed
--- /dev/null
+++ b/db/frag.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+extern void    frag_init(void);
diff --git a/db/freesp.c b/db/freesp.c
new file mode 100644 (file)
index 0000000..160d236
--- /dev/null
@@ -0,0 +1,427 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include <getopt.h>
+#include "command.h"
+#include "data.h"
+#include "freesp.h"
+#include "io.h"
+#include "type.h"
+#include "output.h"
+#include "mount.h"
+#include "malloc.h"
+
+typedef struct histent
+{
+       int             low;
+       int             high;
+       long long       count;
+       long long       blocks;
+} histent_t;
+
+static void    addhistent(int h);
+static void    addtohist(xfs_agnumber_t agno, xfs_agblock_t agbno,
+                         xfs_extlen_t len);
+static int     freesp_f(int argc, char **argv);
+static void    histinit(int maxlen);
+static int     init(int argc, char **argv);
+static void    printhist(void);
+static void    scan_ag(xfs_agnumber_t agno);
+static void    scanfunc_bno(xfs_btree_sblock_t *ablock, typnm_t typ, int level,
+                            xfs_agf_t *agf);
+static void    scanfunc_cnt(xfs_btree_sblock_t *ablock, typnm_t typ, int level,
+                            xfs_agf_t *agf);
+static void    scan_freelist(xfs_agf_t *agf);
+static void    scan_sbtree(xfs_agf_t *agf, xfs_agblock_t root, typnm_t typ,
+                           int nlevels,
+                           void (*func)(xfs_btree_sblock_t *block, typnm_t typ,
+                                        int level, xfs_agf_t *agf));
+static int     usage(void);
+
+static int             agcount;
+static xfs_agnumber_t  *aglist;
+static int             countflag;
+static int             dumpflag;
+static int             equalsize;
+static histent_t       *hist;
+static int             histcount;
+static int             multsize;
+static int             seen1;
+static int             summaryflag;
+static long long       totblocks;
+static long long       totexts;
+
+static const cmdinfo_t freesp_cmd =
+       { "freesp", NULL, freesp_f, 0, -1, 0,
+         "[-bcdfs] [-a agno]... [-e binsize] [-h h1]... [-m binmult]", 
+         "summarize free space for filesystem", NULL };
+
+static int
+inaglist(
+       xfs_agnumber_t  agno)
+{
+       int             i;
+
+       if (agcount == 0)
+               return 1;
+       for (i = 0; i < agcount; i++)
+               if (aglist[i] == agno)
+                       return 1;
+       return 0;
+}
+
+/*
+ * Report on freespace usage in xfs filesystem.
+ */
+static int
+freesp_f(
+       int             argc,
+       char            **argv)
+{
+       xfs_agnumber_t  agno;
+
+       if (!init(argc, argv))
+               return 0;
+       for (agno = 0; agno < mp->m_sb.sb_agcount; agno++)  {
+               if (inaglist(agno))
+                       scan_ag(agno);
+       }
+       if (histcount)
+               printhist();
+       if (summaryflag) {
+               dbprintf("total free extents %lld\n", totexts);
+               dbprintf("total free blocks %lld\n", totblocks);
+               dbprintf("average free extent size %g\n",
+                       (double)totblocks / (double)totexts);
+       }
+       if (aglist)
+               xfree(aglist);
+       if (hist)
+               xfree(hist);
+       return 0;
+}
+
+void
+freesp_init(void)
+{
+       add_command(&freesp_cmd);
+}
+
+static void
+aglistadd(
+       char    *a)
+{
+       aglist = xrealloc(aglist, (agcount + 1) * sizeof(*aglist));
+       aglist[agcount] = (xfs_agnumber_t)atoi(a);
+       agcount++;
+}
+
+static int
+init(
+       int             argc,
+       char            **argv)
+{
+       int             c;
+       int             speced = 0;
+
+       agcount = countflag = dumpflag = equalsize = multsize = optind = 0;
+       histcount = seen1 = summaryflag = 0;
+       totblocks = totexts = 0;
+       aglist = NULL;
+       hist = NULL;
+       while ((c = getopt(argc, argv, "a:bcde:h:m:s")) != EOF) {
+               switch (c) {
+               case 'a':
+                       aglistadd(optarg);
+                       break;
+               case 'b':
+                       if (speced) 
+                               return usage();
+                       multsize = 2;
+                       speced = 1;
+                       break;
+               case 'c':
+                       countflag = 1;
+                       break;
+               case 'd':
+                       dumpflag = 1;
+                       break;
+               case 'e':
+                       if (speced)
+                               return usage();
+                       equalsize = atoi(optarg);
+                       speced = 1;
+                       break;
+               case 'h':
+                       if (speced && !histcount)
+                               return usage();
+                       addhistent(atoi(optarg));
+                       speced = 1;
+                       break;
+               case 'm':
+                       if (speced)
+                               return usage();
+                       multsize = atoi(optarg);
+                       speced = 1;
+                       break;
+               case 's':
+                       summaryflag = 1;
+                       break;
+               case '?':
+                       return usage();
+               }
+       }
+       if (optind != argc)
+               return usage();
+       if (!speced)
+               multsize = 2;
+       histinit((int)mp->m_sb.sb_agblocks);
+       return 1;
+}
+
+static int
+usage(void)
+{
+       dbprintf("freesp arguments: [-bcdfs] [-a agno] [-e binsize] [-h h1]... "
+                "[-m binmult]\n");
+       return 0;
+}
+
+static void
+scan_ag(
+       xfs_agnumber_t  agno)
+{
+       xfs_agf_t       *agf;
+
+       push_cur();
+       set_cur(&typtab[TYP_AGF], XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR), 1,
+               DB_RING_IGN, NULL);
+       agf = iocur_top->data;
+       scan_freelist(agf);
+       if (countflag)
+               scan_sbtree(agf,
+                       INT_GET(agf->agf_roots[XFS_BTNUM_CNT], ARCH_CONVERT),
+                       TYP_CNTBT,
+                       INT_GET(agf->agf_levels[XFS_BTNUM_CNT], ARCH_CONVERT),
+                       scanfunc_cnt);
+       else
+               scan_sbtree(agf,
+                       INT_GET(agf->agf_roots[XFS_BTNUM_BNO], ARCH_CONVERT),
+                       TYP_BNOBT,
+                       INT_GET(agf->agf_levels[XFS_BTNUM_BNO], ARCH_CONVERT),
+                       scanfunc_bno);
+       pop_cur();
+}
+
+static void
+scan_freelist(
+       xfs_agf_t       *agf)
+{
+       xfs_agnumber_t  seqno = INT_GET(agf->agf_seqno, ARCH_CONVERT);
+       xfs_agfl_t      *agfl;
+       xfs_agblock_t   bno;
+       int             i;
+
+       if (INT_GET(agf->agf_flcount, ARCH_CONVERT) == 0)
+               return;
+       push_cur();
+       set_cur(&typtab[TYP_AGFL],
+               XFS_AG_DADDR(mp, seqno, XFS_AGFL_DADDR), 1,
+               DB_RING_IGN, NULL);
+       agfl = iocur_top->data;
+       i = INT_GET(agf->agf_flfirst, ARCH_CONVERT);
+       for (;;) {
+               bno = INT_GET(agfl->agfl_bno[i], ARCH_CONVERT);
+               addtohist(seqno, bno, 1);
+               if (i == INT_GET(agf->agf_fllast, ARCH_CONVERT))
+                       break;
+               if (++i == XFS_AGFL_SIZE)
+                       i = 0;
+       }
+       pop_cur();
+}
+
+static void
+scan_sbtree(
+       xfs_agf_t       *agf,
+       xfs_agblock_t   root,
+       typnm_t         typ,
+       int             nlevels,
+       void            (*func)(xfs_btree_sblock_t      *block,
+                               typnm_t                 typ,
+                               int                     level,
+                               xfs_agf_t               *agf))
+{
+       push_cur();
+       set_cur(&typtab[typ],
+               XFS_AGB_TO_DADDR(mp, INT_GET(agf->agf_seqno, ARCH_CONVERT), root),
+               blkbb, DB_RING_IGN, NULL);
+       (*func)((xfs_btree_sblock_t *)iocur_top->data, typ, nlevels - 1, agf);
+       pop_cur();
+}
+
+/*ARGSUSED*/
+static void
+scanfunc_bno(
+       xfs_btree_sblock_t      *ablock,
+       typnm_t                 typ,
+       int                     level,
+       xfs_agf_t               *agf)
+{
+       xfs_alloc_block_t       *block = (xfs_alloc_block_t *)ablock;
+       int                     i;
+       xfs_alloc_ptr_t         *pp;
+       xfs_alloc_rec_t         *rp;
+
+       if (level == 0) {
+               rp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, block,
+                       1, mp->m_alloc_mxr[0]);
+               for (i = 0; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++)
+                       addtohist(INT_GET(agf->agf_seqno, ARCH_CONVERT),
+                               INT_GET(rp[i].ar_startblock, ARCH_CONVERT), INT_GET(rp[i].ar_blockcount, ARCH_CONVERT));
+               return;
+       }
+       pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, block, 1,
+               mp->m_alloc_mxr[1]);
+       for (i = 0; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++)
+               scan_sbtree(agf, pp[i], typ, level, scanfunc_bno);
+}
+
+static void
+scanfunc_cnt(
+       xfs_btree_sblock_t      *ablock,
+       typnm_t                 typ,
+       int                     level,
+       xfs_agf_t               *agf)
+{
+       xfs_alloc_block_t       *block = (xfs_alloc_block_t *)ablock;
+       int                     i;
+       xfs_alloc_ptr_t         *pp;
+       xfs_alloc_rec_t         *rp;
+
+       if (level == 0) {
+               rp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, block,
+                       1, mp->m_alloc_mxr[0]);
+               for (i = 0; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++)
+                       addtohist(INT_GET(agf->agf_seqno, ARCH_CONVERT),
+                               INT_GET(rp[i].ar_startblock, ARCH_CONVERT), INT_GET(rp[i].ar_blockcount, ARCH_CONVERT));
+               return;
+       }
+       pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, block, 1,
+               mp->m_alloc_mxr[1]);
+       for (i = 0; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++)
+               scan_sbtree(agf, pp[i], typ, level, scanfunc_cnt);
+}
+
+static void
+addhistent(
+       int     h)
+{
+       hist = xrealloc(hist, (histcount + 1) * sizeof(*hist));
+       if (h == 0)
+               h = 1;
+       hist[histcount].low = h;
+       hist[histcount].count = hist[histcount].blocks = 0;
+       histcount++;
+       if (h == 1)
+               seen1 = 1;
+}
+
+static void
+addtohist(
+       xfs_agnumber_t  agno,
+       xfs_agblock_t   agbno,
+       xfs_extlen_t    len)
+{
+       int             i;
+
+       if (dumpflag)
+               dbprintf("%8d %8d %8d\n", agno, agbno, len);
+       totexts++;
+       totblocks += len;
+       for (i = 0; i < histcount; i++) {
+               if (hist[i].high >= len) {
+                       hist[i].count++;
+                       hist[i].blocks += len;
+                       break;
+               }
+       }
+}
+
+static int
+hcmp(
+       const void      *a,
+       const void      *b)
+{
+       return ((histent_t *)a)->low - ((histent_t *)b)->low;
+}
+
+static void
+histinit(
+       int     maxlen)
+{
+       int     i;
+
+       if (equalsize) {
+               for (i = 1; i < maxlen; i += equalsize)
+                       addhistent(i);
+       } else if (multsize) {
+               for (i = 1; i < maxlen; i *= multsize)
+                       addhistent(i);
+       } else {
+               if (!seen1)
+                       addhistent(1);
+               qsort(hist, histcount, sizeof(*hist), hcmp);
+       }
+       for (i = 0; i < histcount; i++) {
+               if (i < histcount - 1)
+                       hist[i].high = hist[i + 1].low - 1;
+               else
+                       hist[i].high = maxlen;
+       }
+}
+
+static void
+printhist(void)
+{
+       int     i;
+
+       dbprintf("%7s %7s %7s %7s %6s\n",
+               "from", "to", "extents", "blocks", "pct");
+       for (i = 0; i < histcount; i++) {
+               if (hist[i].count)
+                       dbprintf("%7d %7d %7lld %7lld %6.2f\n", hist[i].low,
+                               hist[i].high, hist[i].count, hist[i].blocks,
+                               hist[i].blocks * 100.0 / totblocks);
+       }
+}
diff --git a/db/freesp.h b/db/freesp.h
new file mode 100644 (file)
index 0000000..15ea505
--- /dev/null
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+extern void    freesp_init(void);
diff --git a/db/hash.c b/db/hash.c
new file mode 100644 (file)
index 0000000..e11851b
--- /dev/null
+++ b/db/hash.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "addr.h"
+#include "command.h"
+#include "type.h"
+#include "io.h"
+#include "output.h"
+
+static int hash_f(int argc, char **argv);
+static void hash_help(void);
+
+static const cmdinfo_t hash_cmd =
+       { "hash", NULL, hash_f, 1, 1, 0, "string",
+         "calculate hash value", hash_help };
+
+static void
+hash_help(void)
+{
+       dbprintf(
+"\n"
+" 'hash' prints out the calculated hash value for a string using the\n"
+"directory/attribute code hash function.\n"
+"\n"
+" Usage:  \"hash <string>\"\n"
+"\n"
+);
+
+}
+
+/* ARGSUSED */
+static int
+hash_f(
+       int             argc,
+       char            **argv)
+{
+       xfs_dahash_t    hashval;
+
+       hashval = libxfs_da_hashname(argv[1], (int)strlen(argv[1]));
+       dbprintf("0x%x\n", hashval);
+       return 0;
+}
+
+void
+hash_init(void)
+{
+       add_command(&hash_cmd);
+}
diff --git a/db/hash.h b/db/hash.h
new file mode 100644 (file)
index 0000000..05c8075
--- /dev/null
+++ b/db/hash.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+extern void            hash_init(void);
+extern xfs_dahash_t    xfs_da_hashname(char *name, int namelen);
diff --git a/db/help.c b/db/help.c
new file mode 100644 (file)
index 0000000..56dd8b8
--- /dev/null
+++ b/db/help.c
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "command.h"
+#include "help.h"
+#include "output.h"
+
+static void    help_all(void);
+static void    help_onecmd(const char *cmd, const cmdinfo_t *ct);
+static int     help_f(int argc, char **argv);
+static void    help_oneline(const char *cmd, const cmdinfo_t *ct);
+
+static const cmdinfo_t help_cmd =
+       { "help", "?", help_f, 0, 1, 0, "[command]",
+         "help for one or all commands", NULL };
+
+static void
+help_all(void)
+{
+       const cmdinfo_t *ct;
+
+       for (ct = cmdtab; ct < &cmdtab[ncmds]; ct++)
+               help_oneline(ct->name, ct);
+       dbprintf("\nUse 'help commandname' for extended help.\n");
+}
+
+static int
+help_f(
+       int             argc,
+       char            **argv)
+{
+       const cmdinfo_t *ct;
+
+       if (argc == 1) {
+               help_all();
+               return 0;
+       }
+       ct = find_command(argv[1]);
+       if (ct == NULL) {
+               dbprintf("command %s not found\n", argv[1]);
+               return 0;
+       }
+       help_onecmd(argv[1], ct);
+       return 0;
+}
+
+void
+help_init(void)
+{
+       add_command(&help_cmd);
+}
+
+static void
+help_onecmd(
+       const char      *cmd,
+       const cmdinfo_t *ct)
+{
+       help_oneline(cmd, ct);
+       if (ct->help)
+               ct->help();
+}
+
+static void
+help_oneline(
+       const char      *cmd,
+       const cmdinfo_t *ct)
+{
+       if (cmd)
+               dbprintf("%s ", cmd);
+       else {
+               dbprintf("%s ", ct->name);
+               if (ct->altname)
+                       dbprintf("(or %s) ", ct->altname);
+       }
+       if (ct->args)
+               dbprintf("%s ", ct->args);
+       dbprintf("-- %s\n", ct->oneline);
+}
+
diff --git a/db/help.h b/db/help.h
new file mode 100644 (file)
index 0000000..4af57d2
--- /dev/null
+++ b/db/help.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+extern void    help_init(void);
diff --git a/db/init.c b/db/init.c
new file mode 100644 (file)
index 0000000..e4233df
--- /dev/null
+++ b/db/init.c
@@ -0,0 +1,148 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include <getopt.h>
+#include <signal.h>
+#include "command.h"
+#include "data.h"
+#include "init.h"
+#include "input.h"
+#include "io.h"
+#include "mount.h"
+#include "sig.h"
+#include "output.h"
+
+char   *fsdevice;
+
+static void
+usage(void)
+{
+       dbprintf("Usage: %s [-c cmd]... [-p prog] [-l logdev] [-frxV] devname\n", progname);
+       exit(1);
+}
+
+void
+init(
+       int             argc,
+       char            **argv)
+{
+       int             c;
+       FILE            *cfile = NULL;
+
+       progname = basename(argv[0]);
+       while ((c = getopt(argc, argv, "c:fip:rxVl:")) != EOF) {
+               switch (c) {
+               case 'c':
+                       if (!cfile)
+                               cfile = tmpfile();
+                        if (!cfile) {
+                                perror("tmpfile");
+                                exit(1);
+                        }
+                       if (fprintf(cfile, "%s\n", optarg) < 0) {
+                                perror("fprintf(tmpfile)");
+                                dbprintf("%s: error writing temporary file\n",
+                                        progname);
+                                exit(1);
+                        }
+                       break;
+               case 'f':
+                       xfsargs.disfile = 1;
+                       break;
+               case 'i':
+                       xfsargs.isreadonly =
+                               (LIBXFS_ISREADONLY | LIBXFS_ISINACTIVE);
+                       flag_readonly = 1;
+                       break;
+               case 'p':
+                       progname = optarg;
+                       break;
+               case 'r':
+                       xfsargs.isreadonly = LIBXFS_ISREADONLY;
+                       flag_readonly = 1;
+                       break;
+               case 'l':
+                       xfsargs.logname = optarg;
+                       break;
+               case 'x':
+                       flag_expert_mode = 1;
+                       break;
+               case 'V':
+                       printf("%s version %s\n", progname, VERSION);
+                       break;
+               case '?':
+                       usage();
+                       /*NOTREACHED*/
+               }
+       }
+       if (optind + 1 != argc) {
+               usage();
+               /*NOTREACHED*/
+       }
+       fsdevice = argv[optind];
+       if (!xfsargs.disfile)
+               xfsargs.volname = fsdevice;
+       else
+               xfsargs.dname = fsdevice;
+       xfsargs.notvolok = 1;
+       if (!libxfs_init(&xfsargs)) {
+               fputs("\nfatal error -- couldn't initialize XFS library\n",
+                       stderr);
+               exit(1);
+       }
+       mp = dbmount();
+       if (mp == NULL) {
+               dbprintf("%s: %s is not a valid filesystem\n",
+                       progname, fsdevice);
+               exit(1);
+               /*NOTREACHED*/
+       }
+       blkbb = 1 << mp->m_blkbb_log;
+       push_cur();
+       init_commands();
+       init_sig();
+       if (cfile) {
+               if (fprintf(cfile, "q\n")<0) {
+                    perror("fprintf(tmpfile)");
+                    dbprintf("%s: error writing temporary file\n", progname);
+                    exit(1);
+                }
+                if (fflush(cfile)<0) {
+                    perror("fflush(tmpfile)");
+                    dbprintf("%s: error writing temporary file\n", progname);
+                    exit(1);
+                }
+               rewind(cfile);
+               pushfile(cfile);
+       }
+}
diff --git a/db/init.h b/db/init.h
new file mode 100644 (file)
index 0000000..5cbbda0
--- /dev/null
+++ b/db/init.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+extern char    *fsdevice;
+extern void    init(int argc, char **argv);
diff --git a/db/inobt.c b/db/inobt.c
new file mode 100644 (file)
index 0000000..4b43d94
--- /dev/null
@@ -0,0 +1,193 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "data.h"
+#include "type.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "inobt.h"
+#include "print.h"
+#include "bit.h"
+#include "mount.h"
+
+static int     inobt_key_count(void *obj, int startoff);
+static int     inobt_key_offset(void *obj, int startoff, int idx);
+static int     inobt_ptr_count(void *obj, int startoff);
+static int     inobt_ptr_offset(void *obj, int startoff, int idx);
+static int     inobt_rec_count(void *obj, int startoff);
+static int     inobt_rec_offset(void *obj, int startoff, int idx);
+
+const field_t  inobt_hfld[] = {
+       { "", FLDT_INOBT, OI(0), C1, 0, TYP_NONE },
+       { NULL }
+};
+
+#define        OFF(f)  bitize(offsetof(xfs_inobt_block_t, bb_ ## f))
+const field_t  inobt_flds[] = {
+       { "magic", FLDT_UINT32X, OI(OFF(magic)), C1, 0, TYP_NONE },
+       { "level", FLDT_UINT16D, OI(OFF(level)), C1, 0, TYP_NONE },
+       { "numrecs", FLDT_UINT16D, OI(OFF(numrecs)), C1, 0, TYP_NONE },
+       { "leftsib", FLDT_AGBLOCK, OI(OFF(leftsib)), C1, 0, TYP_INOBT },
+       { "rightsib", FLDT_AGBLOCK, OI(OFF(rightsib)), C1, 0, TYP_INOBT },
+       { "recs", FLDT_INOBTREC, inobt_rec_offset, inobt_rec_count,
+         FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_NONE },
+       { "keys", FLDT_INOBTKEY, inobt_key_offset, inobt_key_count,
+         FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_NONE },
+       { "ptrs", FLDT_INOBTPTR, inobt_ptr_offset, inobt_ptr_count,
+         FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_INOBT },
+       { NULL }
+};
+
+#define        KOFF(f) bitize(offsetof(xfs_inobt_key_t, ir_ ## f))
+const field_t  inobt_key_flds[] = {
+       { "startino", FLDT_AGINO, OI(KOFF(startino)), C1, 0, TYP_INODE },
+       { NULL }
+};
+
+#define        ROFF(f) bitize(offsetof(xfs_inobt_rec_t, ir_ ## f))
+const field_t  inobt_rec_flds[] = {
+       { "startino", FLDT_AGINO, OI(ROFF(startino)), C1, 0, TYP_INODE },
+       { "freecount", FLDT_INT32D, OI(ROFF(freecount)), C1, 0, TYP_NONE },
+       { "free", FLDT_INOFREE, OI(ROFF(free)), C1, 0, TYP_NONE },
+       { NULL }
+};
+
+/*ARGSUSED*/
+static int
+inobt_key_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_inobt_block_t       *block;
+
+       ASSERT(startoff == 0);
+       block = obj;
+       if (INT_GET(block->bb_level, ARCH_CONVERT) == 0)
+               return 0;
+       return INT_GET(block->bb_numrecs, ARCH_CONVERT);
+}
+
+/*ARGSUSED*/
+static int
+inobt_key_offset(
+       void                    *obj,
+       int                     startoff,
+       int                     idx)
+{
+       xfs_inobt_block_t       *block;
+       xfs_inobt_key_t         *kp;
+
+       ASSERT(startoff == 0);
+       block = obj;
+       ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0);
+       kp = XFS_BTREE_KEY_ADDR(mp->m_sb.sb_blocksize, xfs_inobt, block, idx,
+               XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_inobt, 0));
+       return bitize((int)((char *)kp - (char *)block));
+}
+
+/*ARGSUSED*/
+static int
+inobt_ptr_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_inobt_block_t       *block;
+
+       ASSERT(startoff == 0);
+       block = obj;
+       if (INT_GET(block->bb_level, ARCH_CONVERT) == 0)
+               return 0;
+       return INT_GET(block->bb_numrecs, ARCH_CONVERT);
+}
+
+/*ARGSUSED*/
+static int
+inobt_ptr_offset(
+       void                    *obj,
+       int                     startoff,
+       int                     idx)
+{
+       xfs_inobt_block_t       *block;
+       xfs_inobt_ptr_t         *pp;
+
+       ASSERT(startoff == 0);
+       block = obj;
+       ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0);
+       pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_inobt, block, idx,
+               XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_inobt, 0));
+       return bitize((int)((char *)pp - (char *)block));
+}
+
+/*ARGSUSED*/
+static int
+inobt_rec_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_inobt_block_t       *block;
+
+       ASSERT(startoff == 0);
+       block = obj;
+       if (INT_GET(block->bb_level, ARCH_CONVERT) > 0)
+               return 0;
+       return INT_GET(block->bb_numrecs, ARCH_CONVERT);
+}
+
+/*ARGSUSED*/
+static int
+inobt_rec_offset(
+       void                    *obj,
+       int                     startoff,
+       int                     idx)
+{
+       xfs_inobt_block_t       *block;
+       xfs_inobt_rec_t         *rp;
+
+       ASSERT(startoff == 0);
+       block = obj;
+       ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) == 0);
+       rp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_inobt, block, idx,
+               XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_inobt, 1));
+       return bitize((int)((char *)rp - (char *)block));
+}
+
+/*ARGSUSED*/
+int
+inobt_size(
+       void    *obj,
+       int     startoff,
+       int     idx)
+{
+       return bitize(mp->m_sb.sb_blocksize);
+}
diff --git a/db/inobt.h b/db/inobt.h
new file mode 100644 (file)
index 0000000..23aeb7e
--- /dev/null
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+struct field;
+
+extern const struct field      inobt_flds[];
+extern const struct field      inobt_hfld[];
+extern const struct field      inobt_key_flds[];
+extern const struct field      inobt_rec_flds[];
+
+extern int     inobt_size(void *obj, int startoff, int idx);
diff --git a/db/inode.c b/db/inode.c
new file mode 100644 (file)
index 0000000..a12e32f
--- /dev/null
@@ -0,0 +1,594 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "command.h"
+#include "data.h"
+#include "type.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "inode.h"
+#include "io.h"
+#include "print.h"
+#include "block.h"
+#include "bit.h"
+#include "output.h"
+#include "mount.h"
+
+static int     inode_a_bmbt_count(void *obj, int startoff);
+static int     inode_a_bmx_count(void *obj, int startoff);
+static int     inode_a_count(void *obj, int startoff);
+static int     inode_a_offset(void *obj, int startoff, int idx);
+static int     inode_a_sfattr_count(void *obj, int startoff);
+static int     inode_core_nlinkv2_count(void *obj, int startoff);
+static int     inode_core_onlink_count(void *obj, int startoff);
+static int     inode_core_projid_count(void *obj, int startoff);
+static int     inode_core_nlinkv1_count(void *obj, int startoff);
+static int     inode_f(int argc, char **argv);
+static int     inode_u_bmbt_count(void *obj, int startoff);
+static int     inode_u_bmx_count(void *obj, int startoff);
+static int     inode_u_c_count(void *obj, int startoff);
+static int     inode_u_dev_count(void *obj, int startoff);
+static int     inode_u_muuid_count(void *obj, int startoff);
+static int     inode_u_sfdir_count(void *obj, int startoff);
+static int     inode_u_sfdir2_count(void *obj, int startoff);
+static int     inode_u_symlink_count(void *obj, int startoff);
+
+static const cmdinfo_t inode_cmd =
+       { "inode", NULL, inode_f, 0, 1, 1, "[inode#]",
+         "set current inode", NULL };
+
+const field_t  inode_hfld[] = {
+       { "", FLDT_INODE, OI(0), C1, 0, TYP_NONE },
+       { NULL }
+};
+
+#define        OFF(f)  bitize(offsetof(xfs_dinode_t, di_ ## f))
+const field_t  inode_flds[] = {
+       { "core", FLDT_DINODE_CORE, OI(OFF(core)), C1, 0, TYP_NONE },
+       { "next_unlinked", FLDT_AGINO, OI(OFF(next_unlinked)), C1, 0,
+         TYP_INODE },
+       { "u", FLDT_DINODE_U, OI(OFF(u)), C1, 0, TYP_NONE },
+       { "a", FLDT_DINODE_A, inode_a_offset, inode_a_count,
+         FLD_COUNT|FLD_OFFSET, TYP_NONE },
+       { NULL }
+};
+
+#define        COFF(f) bitize(offsetof(xfs_dinode_core_t, di_ ## f))
+const field_t  inode_core_flds[] = {
+       { "magic", FLDT_UINT16X, OI(COFF(magic)), C1, 0, TYP_NONE },
+       { "mode", FLDT_UINT16O, OI(COFF(mode)), C1, 0, TYP_NONE },
+       { "version", FLDT_INT8D, OI(COFF(version)), C1, 0, TYP_NONE },
+       { "format", FLDT_DINODE_FMT, OI(COFF(format)), C1, 0, TYP_NONE },
+       { "nlinkv1", FLDT_UINT16D, OI(COFF(onlink)), inode_core_nlinkv1_count,
+         FLD_COUNT, TYP_NONE },
+       { "nlinkv2", FLDT_UINT32D, OI(COFF(nlink)), inode_core_nlinkv2_count,
+         FLD_COUNT, TYP_NONE },
+       { "onlink", FLDT_UINT16D, OI(COFF(onlink)), inode_core_onlink_count,
+         FLD_COUNT, TYP_NONE },
+       { "projid", FLDT_UINT16D, OI(COFF(projid)), inode_core_projid_count,
+         FLD_COUNT, TYP_NONE },
+       { "uid", FLDT_UINT32D, OI(COFF(uid)), C1, 0, TYP_NONE },
+       { "gid", FLDT_UINT32D, OI(COFF(gid)), C1, 0, TYP_NONE },
+       { "atime", FLDT_TIMESTAMP, OI(COFF(atime)), C1, 0, TYP_NONE },
+       { "mtime", FLDT_TIMESTAMP, OI(COFF(mtime)), C1, 0, TYP_NONE },
+       { "ctime", FLDT_TIMESTAMP, OI(COFF(ctime)), C1, 0, TYP_NONE },
+       { "size", FLDT_FSIZE, OI(COFF(size)), C1, 0, TYP_NONE },
+       { "nblocks", FLDT_DRFSBNO, OI(COFF(nblocks)), C1, 0, TYP_NONE },
+       { "extsize", FLDT_EXTLEN, OI(COFF(extsize)), C1, 0, TYP_NONE },
+       { "nextents", FLDT_EXTNUM, OI(COFF(nextents)), C1, 0, TYP_NONE },
+       { "naextents", FLDT_AEXTNUM, OI(COFF(anextents)), C1, 0, TYP_NONE },
+       { "forkoff", FLDT_UINT8D, OI(COFF(forkoff)), C1, 0, TYP_NONE },
+       { "aformat", FLDT_DINODE_FMT, OI(COFF(aformat)), C1, 0, TYP_NONE },
+       { "dmevmask", FLDT_UINT32X, OI(COFF(dmevmask)), C1, 0, TYP_NONE },
+       { "dmstate", FLDT_UINT16D, OI(COFF(dmstate)), C1, 0, TYP_NONE },
+       { "flags", FLDT_UINT16X, OI(COFF(flags)), C1, FLD_SKIPALL, TYP_NONE },
+       { "newrtbm", FLDT_UINT1,
+         OI(COFF(flags) + bitsz(__uint16_t) - XFS_DIFLAG_NEWRTBM_BIT - 1), C1,
+         0, TYP_NONE },
+       { "prealloc", FLDT_UINT1,
+         OI(COFF(flags) + bitsz(__uint16_t) - XFS_DIFLAG_PREALLOC_BIT - 1), C1,
+         0, TYP_NONE },
+       { "realtime", FLDT_UINT1,
+         OI(COFF(flags) + bitsz(__uint16_t) - XFS_DIFLAG_REALTIME_BIT - 1), C1,
+         0, TYP_NONE },
+       { "gen", FLDT_UINT32D, OI(COFF(gen)), C1, 0, TYP_NONE },
+       { NULL }
+};
+
+#define        TOFF(f) bitize(offsetof(xfs_timestamp_t, t_ ## f))
+const field_t  timestamp_flds[] = {
+       { "sec", FLDT_TIME, OI(TOFF(sec)), C1, 0, TYP_NONE },
+       { "nsec", FLDT_NSEC, OI(TOFF(nsec)), C1, 0, TYP_NONE },
+       { NULL }
+};
+
+const field_t  inode_u_flds[] = {
+       { "bmbt", FLDT_BMROOTD, 0, inode_u_bmbt_count, FLD_COUNT, TYP_NONE },
+       { "bmx", FLDT_BMAPBTDREC, 0, inode_u_bmx_count, FLD_ARRAY|FLD_COUNT,
+         TYP_NONE },
+       { "c", FLDT_CHARNS, 0, inode_u_c_count, FLD_COUNT, TYP_NONE },
+       { "dev", FLDT_DEV, 0, inode_u_dev_count, FLD_COUNT, TYP_NONE },
+       { "muuid", FLDT_UUID, 0, inode_u_muuid_count, FLD_COUNT, TYP_NONE },
+       { "sfdir", FLDT_DIRSHORT, 0, inode_u_sfdir_count, FLD_COUNT, TYP_NONE },
+       { "sfdir2", FLDT_DIR2SF, 0, inode_u_sfdir2_count, FLD_COUNT, TYP_NONE },
+       { "symlink", FLDT_CHARNS, 0, inode_u_symlink_count, FLD_COUNT,
+         TYP_NONE },
+       { NULL }
+};
+
+const field_t  inode_a_flds[] = {
+       { "bmbt", FLDT_BMROOTA, 0, inode_a_bmbt_count, FLD_COUNT, TYP_NONE },
+       { "bmx", FLDT_BMAPBTAREC, 0, inode_a_bmx_count, FLD_ARRAY|FLD_COUNT,
+         TYP_NONE },
+       { "sfattr", FLDT_ATTRSHORT, 0, inode_a_sfattr_count, FLD_COUNT,
+         TYP_NONE },
+       { NULL }
+};
+
+static const char      *dinode_fmt_name[] =
+       { "dev", "local", "extents", "btree", "uuid" };
+static const int       dinode_fmt_name_size =
+       sizeof(dinode_fmt_name) / sizeof(dinode_fmt_name[0]);
+
+/*ARGSUSED*/
+int
+fp_dinode_fmt(
+       void                    *obj,
+       int                     bit,
+       int                     count,
+       char                    *fmtstr,
+       int                     size,
+       int                     arg,
+       int                     base,
+       int                     array)
+{
+       int                     bitpos;
+       xfs_dinode_fmt_t        f;
+       int                     i;
+
+       for (i = 0, bitpos = bit; i < count; i++, bitpos += size) {
+               f = (xfs_dinode_fmt_t)getbitval(obj, bitpos, size, BVSIGNED);
+               if (array)
+                       dbprintf("%d:", i + base);
+               if (f < 0 || f >= dinode_fmt_name_size)
+                       dbprintf("%d", (int)f);
+               else
+                       dbprintf("%d (%s)", (int)f, dinode_fmt_name[(int)f]);
+               if (i < count - 1)
+                       dbprintf(" ");
+       }
+       return 1;
+}
+
+static int
+inode_a_bmbt_count(
+       void            *obj,
+       int             startoff)
+{
+       xfs_dinode_t    *dip;
+
+       ASSERT(bitoffs(startoff) == 0);
+       ASSERT(obj == iocur_top->data);
+       dip = obj;
+       if (!XFS_DFORK_Q_ARCH(dip, ARCH_CONVERT))
+               return 0;
+       ASSERT((char *)XFS_DFORK_APTR_ARCH(dip, ARCH_CONVERT) - (char *)dip == byteize(startoff));
+       return INT_GET(dip->di_core.di_aformat, ARCH_CONVERT) == XFS_DINODE_FMT_BTREE;
+}
+
+static int
+inode_a_bmx_count(
+       void            *obj,
+       int             startoff)
+{
+       xfs_dinode_t    *dip;
+
+       ASSERT(bitoffs(startoff) == 0);
+       ASSERT(obj == iocur_top->data);
+       dip = obj;
+       if (!XFS_DFORK_Q_ARCH(dip, ARCH_CONVERT))
+               return 0;
+       ASSERT((char *)XFS_DFORK_APTR_ARCH(dip, ARCH_CONVERT) - (char *)dip == byteize(startoff));
+       return INT_GET(dip->di_core.di_aformat, ARCH_CONVERT) == XFS_DINODE_FMT_EXTENTS ?
+               INT_GET(dip->di_core.di_anextents, ARCH_CONVERT) : 0;
+}
+
+static int
+inode_a_count(
+       void            *obj,
+       int             startoff)
+{
+       xfs_dinode_t    *dip;
+
+       ASSERT(startoff == 0);
+       dip = obj;
+       return XFS_DFORK_Q_ARCH(dip, ARCH_CONVERT);
+}
+
+static int
+inode_a_offset(
+       void            *obj,
+       int             startoff,
+       int             idx)
+{
+       xfs_dinode_t    *dip;
+
+       ASSERT(startoff == 0);
+       ASSERT(idx == 0);
+       dip = obj;
+       ASSERT(XFS_DFORK_Q_ARCH(dip, ARCH_CONVERT));
+       return bitize((int)((char *)XFS_DFORK_APTR_ARCH(dip, ARCH_CONVERT) - (char *)dip));
+}
+
+static int
+inode_a_sfattr_count(
+       void            *obj,
+       int             startoff)
+{
+       xfs_dinode_t    *dip;
+
+       ASSERT(bitoffs(startoff) == 0);
+       ASSERT(obj == iocur_top->data);
+       dip = obj;
+       if (!XFS_DFORK_Q_ARCH(dip, ARCH_CONVERT))
+               return 0;
+       ASSERT((char *)XFS_DFORK_APTR_ARCH(dip, ARCH_CONVERT) - (char *)dip == byteize(startoff));
+       return INT_GET(dip->di_core.di_aformat, ARCH_CONVERT) == XFS_DINODE_FMT_LOCAL;
+}
+
+int
+inode_a_size(
+       void                    *obj,
+       int                     startoff,
+       int                     idx)
+{
+       xfs_attr_shortform_t    *asf;
+       xfs_dinode_t            *dip;
+
+       ASSERT(startoff == 0);
+       ASSERT(idx == 0);
+       dip = obj;
+       switch (INT_GET(dip->di_core.di_aformat, ARCH_CONVERT)) {
+       case XFS_DINODE_FMT_LOCAL:
+               asf = (xfs_attr_shortform_t *)XFS_DFORK_APTR_ARCH(dip, ARCH_CONVERT);
+               return bitize((int)asf->hdr.totsize);
+       case XFS_DINODE_FMT_EXTENTS:
+               return (int)(INT_GET(dip->di_core.di_anextents, ARCH_CONVERT) * bitsz(xfs_bmbt_rec_t));
+       case XFS_DINODE_FMT_BTREE:
+               return bitize((int)XFS_DFORK_ASIZE_ARCH(dip, mp, ARCH_CONVERT));
+       default:
+               return 0;
+       }
+}
+
+static int
+inode_core_nlinkv1_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_dinode_core_t       *dic;
+
+       ASSERT(startoff == 0);
+       ASSERT(obj == iocur_top->data);
+       dic = obj;
+       return dic->di_version == XFS_DINODE_VERSION_1;
+}
+
+static int
+inode_core_nlinkv2_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_dinode_core_t       *dic;
+
+       ASSERT(startoff == 0);
+       ASSERT(obj == iocur_top->data);
+       dic = obj;
+       return dic->di_version == XFS_DINODE_VERSION_2;
+}
+
+static int
+inode_core_onlink_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_dinode_core_t       *dic;
+
+       ASSERT(startoff == 0);
+       ASSERT(obj == iocur_top->data);
+       dic = obj;
+       return dic->di_version == XFS_DINODE_VERSION_2;
+}
+
+static int
+inode_core_projid_count(
+       void                    *obj,
+       int                     startoff)
+{
+       xfs_dinode_core_t       *dic;
+
+       ASSERT(startoff == 0);
+       ASSERT(obj == iocur_top->data);
+       dic = obj;
+       return dic->di_version == XFS_DINODE_VERSION_2;
+}
+
+static int
+inode_f(
+       int             argc,
+       char            **argv)
+{
+       xfs_ino_t       ino;
+       char            *p;
+
+       if (argc > 1) {
+               ino = strtoull(argv[1], &p, 0);
+               if (*p != '\0') {
+                       dbprintf("bad value for inode number %s\n", argv[1]);
+                       return 0;
+               }
+               set_cur_inode(ino);
+       } else if (iocur_top->ino == NULLFSINO)
+               dbprintf("no current inode\n");
+       else
+               dbprintf("current inode number is %lld\n", iocur_top->ino);
+       return 0;
+}
+
+void
+inode_init(void)
+{
+       add_command(&inode_cmd);
+}
+
+typnm_t
+inode_next_type(void)
+{
+       switch (iocur_top->mode & IFMT) {
+       case IFDIR:
+               return XFS_DIR_IS_V2(mp) ? TYP_DIR2 : TYP_DIR;
+       case IFLNK:
+               return TYP_SYMLINK;
+       case IFREG:
+               if (iocur_top->ino == mp->m_sb.sb_rbmino)
+                       return TYP_RTBITMAP;
+               else if (iocur_top->ino == mp->m_sb.sb_rsumino)
+                       return TYP_RTSUMMARY;
+               else if (iocur_top->ino == mp->m_sb.sb_uquotino ||
+                        iocur_top->ino == mp->m_sb.sb_pquotino)
+                       return TYP_DQBLK;
+               else
+                       return TYP_DATA;
+       default:
+               return TYP_NONE;
+       }
+}
+
+int
+inode_size(
+       void    *obj,
+       int     startoff,
+       int     idx)
+{
+       return bitize(mp->m_sb.sb_inodesize);
+}
+
+static int
+inode_u_bmbt_count(
+       void            *obj,
+       int             startoff)
+{
+       xfs_dinode_t    *dip;
+
+       ASSERT(bitoffs(startoff) == 0);
+       ASSERT(obj == iocur_top->data);
+       dip = obj;
+       ASSERT((char *)&dip->di_u - (char *)dip == byteize(startoff));
+       return INT_GET(dip->di_core.di_format, ARCH_CONVERT) == XFS_DINODE_FMT_BTREE;
+}
+
+static int
+inode_u_bmx_count(
+       void            *obj,
+       int             startoff)
+{
+       xfs_dinode_t    *dip;
+
+       ASSERT(bitoffs(startoff) == 0);
+       ASSERT(obj == iocur_top->data);
+       dip = obj;
+       ASSERT((char *)&dip->di_u - (char *)dip == byteize(startoff));
+       return INT_GET(dip->di_core.di_format, ARCH_CONVERT) == XFS_DINODE_FMT_EXTENTS ?
+               INT_GET(dip->di_core.di_nextents, ARCH_CONVERT) : 0;
+}
+
+static int
+inode_u_c_count(
+       void            *obj,
+       int             startoff)
+{
+       xfs_dinode_t    *dip;
+
+       ASSERT(bitoffs(startoff) == 0);
+       ASSERT(obj == iocur_top->data);
+       dip = obj;
+       ASSERT((char *)&dip->di_u - (char *)dip == byteize(startoff));
+       return INT_GET(dip->di_core.di_format, ARCH_CONVERT) == XFS_DINODE_FMT_LOCAL &&
+              (INT_GET(dip->di_core.di_mode, ARCH_CONVERT) & IFMT) == IFREG ?
+               (int)INT_GET(dip->di_core.di_size, ARCH_CONVERT) : 0;
+}
+
+static int
+inode_u_dev_count(
+       void            *obj,
+       int             startoff)
+{
+       xfs_dinode_t    *dip;
+
+       ASSERT(bitoffs(startoff) == 0);
+       ASSERT(obj == iocur_top->data);
+       dip = obj;
+       ASSERT((char *)&dip->di_u - (char *)dip == byteize(startoff));
+       return INT_GET(dip->di_core.di_format, ARCH_CONVERT) == XFS_DINODE_FMT_DEV;
+}
+
+static int
+inode_u_muuid_count(
+       void            *obj,
+       int             startoff)
+{
+       xfs_dinode_t    *dip;
+
+       ASSERT(bitoffs(startoff) == 0);
+       ASSERT(obj == iocur_top->data);
+       dip = obj;
+       ASSERT((char *)&dip->di_u - (char *)dip == byteize(startoff));
+       return INT_GET(dip->di_core.di_format, ARCH_CONVERT) == XFS_DINODE_FMT_UUID;
+}
+
+static int
+inode_u_sfdir_count(
+       void            *obj,
+       int             startoff)
+{
+       xfs_dinode_t    *dip;
+
+       ASSERT(bitoffs(startoff) == 0);
+       ASSERT(obj == iocur_top->data);
+       dip = obj;
+       ASSERT((char *)&dip->di_u - (char *)dip == byteize(startoff));
+       return INT_GET(dip->di_core.di_format, ARCH_CONVERT) == XFS_DINODE_FMT_LOCAL &&
+              (INT_GET(dip->di_core.di_mode, ARCH_CONVERT) & IFMT) == IFDIR
+              && XFS_DIR_IS_V1(mp);
+}
+
+static int
+inode_u_sfdir2_count(
+       void            *obj,
+       int             startoff)
+{
+       xfs_dinode_t    *dip;
+
+       ASSERT(bitoffs(startoff) == 0);
+       ASSERT(obj == iocur_top->data);
+       dip = obj;
+       ASSERT((char *)&dip->di_u - (char *)dip == byteize(startoff));
+       return INT_GET(dip->di_core.di_format, ARCH_CONVERT) == XFS_DINODE_FMT_LOCAL &&
+              (INT_GET(dip->di_core.di_mode, ARCH_CONVERT) & IFMT) == IFDIR &&
+              XFS_DIR_IS_V2(mp);
+}
+
+int
+inode_u_size(
+       void            *obj,
+       int             startoff,
+       int             idx)
+{
+       xfs_dinode_t    *dip;
+
+       ASSERT(startoff == 0);
+       ASSERT(idx == 0);
+       dip = obj;
+       switch (INT_GET(dip->di_core.di_format, ARCH_CONVERT)) {
+       case XFS_DINODE_FMT_DEV:
+               return bitsz(xfs_dev_t);
+       case XFS_DINODE_FMT_LOCAL:
+               return bitize((int)INT_GET(dip->di_core.di_size, ARCH_CONVERT));
+       case XFS_DINODE_FMT_EXTENTS:
+               return (int)(INT_GET(dip->di_core.di_nextents, ARCH_CONVERT) * bitsz(xfs_bmbt_rec_t));
+       case XFS_DINODE_FMT_BTREE:
+               return bitize((int)XFS_DFORK_DSIZE_ARCH(dip, mp, ARCH_CONVERT));
+       case XFS_DINODE_FMT_UUID:
+               return bitsz(uuid_t);
+       default:
+               return 0;
+       }
+}
+
+static int
+inode_u_symlink_count(
+       void            *obj,
+       int             startoff)
+{
+       xfs_dinode_t    *dip;
+
+       ASSERT(bitoffs(startoff) == 0);
+       ASSERT(obj == iocur_top->data);
+       dip = obj;
+       ASSERT((char *)&dip->di_u - (char *)dip == byteize(startoff));
+       return INT_GET(dip->di_core.di_format, ARCH_CONVERT) == XFS_DINODE_FMT_LOCAL &&
+              (INT_GET(dip->di_core.di_mode, ARCH_CONVERT) & IFMT) == IFLNK ?
+               (int)INT_GET(dip->di_core.di_size, ARCH_CONVERT) : 0;
+}
+
+void
+set_cur_inode(
+       xfs_ino_t       ino)
+{
+       xfs_agblock_t   agbno;
+       xfs_agino_t     agino;
+       xfs_agnumber_t  agno;
+       xfs_dinode_t    *dip;
+       int             offset;
+
+       agno = XFS_INO_TO_AGNO(mp, ino);
+       agino = XFS_INO_TO_AGINO(mp, ino);
+       agbno = XFS_AGINO_TO_AGBNO(mp, agino);
+       offset = XFS_AGINO_TO_OFFSET(mp, agino);
+       if (agno >= mp->m_sb.sb_agcount || agbno >= mp->m_sb.sb_agblocks ||
+           offset >= mp->m_sb.sb_inopblock ||
+           XFS_AGINO_TO_INO(mp, agno, agino) != ino) {
+               dbprintf("bad inode number %lld\n", ino);
+               return;
+       }
+       cur_agno = agno;
+       /*
+        * First set_cur to the block with the inode
+        * then use off_cur to get the right part of the buffer.
+        */
+       ASSERT(typtab[TYP_INODE].typnm == TYP_INODE);
+
+       /* ingore ring update here, do it explicitly below */
+       set_cur(&typtab[TYP_INODE], XFS_AGB_TO_DADDR(mp, agno, agbno),
+               blkbb, DB_RING_IGN, NULL);
+       off_cur(offset << mp->m_sb.sb_inodelog, mp->m_sb.sb_inodesize);
+       dip = iocur_top->data;
+       iocur_top->ino = ino;
+       iocur_top->mode = INT_GET(dip->di_core.di_mode, ARCH_CONVERT);
+       if ((iocur_top->mode & IFMT) == IFDIR)
+               iocur_top->dirino = ino;
+
+       /* track updated info in ring */
+       ring_add();
+}
diff --git a/db/inode.h b/db/inode.h
new file mode 100644 (file)
index 0000000..3d0a22f
--- /dev/null
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+extern const struct field      inode_a_flds[];
+extern const struct field      inode_core_flds[];
+extern const struct field      inode_flds[];
+extern const struct field      inode_hfld[];
+extern const struct field      inode_u_flds[];
+extern const struct field      timestamp_flds[];
+
+extern int     fp_dinode_fmt(void *obj, int bit, int count, char *fmtstr,
+                             int size, int arg, int base, int array);
+extern int     inode_a_size(void *obj, int startoff, int idx);
+extern void    inode_init(void);
+extern typnm_t inode_next_type(void);
+extern int     inode_size(void *obj, int startoff, int idx);
+extern int     inode_u_size(void *obj, int startoff, int idx);
+extern void    set_cur_inode(xfs_ino_t ino);
diff --git a/db/input.c b/db/input.c
new file mode 100644 (file)
index 0000000..1394f10
--- /dev/null
@@ -0,0 +1,272 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include <signal.h>
+#include <string.h>
+#include "command.h"
+#include "data.h"
+#include "input.h"
+#include "output.h"
+#include "sig.h"
+#include "malloc.h"
+#include "init.h"
+
+int    inputstacksize;
+FILE   **inputstack;
+FILE   *curinput;
+
+static void    popfile(void);
+static int     source_f(int argc, char **argv);
+
+static const cmdinfo_t source_cmd =
+       { "source", NULL, source_f, 1, 1, 0, "source-file",
+         "get commands from source-file", NULL };
+
+/* our homegrown strtok that understands strings */
+
+static char *
+tokenize(
+       char        *inp)
+{
+       static char *last_place = NULL;
+       char        *start;
+       char        *walk;
+       int         in_string = 0;
+       int         in_escape = 0;
+
+       if (inp) {
+               start = inp;
+       } else {
+               if (last_place == NULL)
+                       return NULL;
+
+               /* we're done */
+               if (*last_place != '\0')
+                       return NULL;
+
+               start = last_place + 1;
+       }
+       last_place = NULL;
+
+       /* eat whitespace */
+       while (*start == ' ' || *start == '\t')
+               start++;
+
+       walk = start;
+       for (;*walk != '\0'; walk++) {
+               if (in_escape) {
+                       in_escape = 0;
+                       continue;
+               }
+               if (*walk == '\\')
+                       in_escape = 1;
+               else if (*walk == '\"')
+                       in_string ^= 1;
+
+               if (!in_string && !in_escape &&
+                   (*walk == ' ' || *walk == '\t')) {
+                       last_place = walk;
+                       *last_place = '\0';
+                       break;
+               }
+       }
+       if (walk == start)
+               return NULL;
+
+       return start;
+}
+
+char **
+breakline(
+       char    *input,
+       int     *count)
+{
+       int     c;
+       char    *inp;
+       char    *p;
+       char    **rval;
+
+       c = 0;
+       inp = input;
+       rval = xcalloc(sizeof(char *), 1);
+       for (;;) {
+
+               p = tokenize(inp);
+
+               if (p == NULL)
+                       break;
+               inp = NULL;
+               c++;
+               rval = xrealloc(rval, sizeof(*rval) * (c + 1));
+               rval[c - 1] = p;
+               rval[c] = NULL;
+       }
+       *count = c;
+       return rval;
+}
+
+void
+doneline(
+       char    *input,
+       char    **vec)
+{
+       xfree(input);
+       xfree(vec);
+}
+
+char *
+fetchline(void)
+{
+       char    buf[1024];
+       int     iscont;
+       size_t  len;
+       size_t  rlen;
+       char    *rval;
+
+       rval = NULL;
+       for (rlen = iscont = 0; ; ) {
+               if (inputstacksize == 1) {
+                       if (iscont)
+                               dbprintf("... ");
+                       else
+                               dbprintf("%s: ", progname);
+                       fflush(stdin);
+               }
+               if (seenint() ||
+                   (!fgets(buf, sizeof(buf), curinput) &&
+                    ferror(curinput) && seenint())) {
+                       clearint();
+                       dbprintf("^C\n");
+                       clearerr(curinput);
+                       if (iscont) {
+                               iscont = 0;
+                               rlen = 0;
+                               if (rval) {
+                                       xfree(rval);
+                                       rval = NULL;
+                               }
+                       }
+                       continue;
+               }
+               if (ferror(curinput) || feof(curinput) ||
+                   (len = strlen(buf)) == 0) {
+                       popfile();
+                       if (curinput == NULL) {
+                               dbprintf("\n");
+                               return NULL;
+                       }
+                       iscont = 0;
+                       rlen = 0;
+                       if (rval) {
+                               xfree(rval);
+                               rval = NULL;
+                       }
+                       continue;
+               }
+               if (inputstacksize == 1)
+                       logprintf("%s", buf);
+               rval = xrealloc(rval, rlen + len + 1);
+               if (rlen == 0)
+                       rval[0] = '\0';
+               rlen += len;
+               strcat(rval, buf);
+               if (buf[len - 1] == '\n') {
+                       if (len > 1 && buf[len - 2] == '\\') {
+                               rval[rlen - 2] = ' ';
+                               rval[rlen - 1] = '\0';
+                               rlen--;
+                               iscont = 1;
+                       } else {
+                               rval[rlen - 1] = '\0';
+                               rlen--;
+                               break;
+                       }
+               }
+       }
+       return rval;
+}
+
+void
+input_init(void)
+{
+       add_command(&source_cmd);
+}
+
+static void
+popfile(void)
+{
+       if (inputstacksize == 0) {
+               curinput = NULL;
+               return;
+       }
+       if (curinput != stdin)
+               fclose(curinput);
+        
+       inputstacksize--;
+        if (inputstacksize) {
+           inputstack =
+                   xrealloc(inputstack, inputstacksize * sizeof(*inputstack));
+            curinput = inputstack[inputstacksize - 1];
+        } else {
+            free(inputstack);
+            curinput = NULL;
+            inputstack = NULL;
+        }
+}
+
+void
+pushfile(
+       FILE    *file)
+{
+       inputstack =
+               xrealloc(inputstack,
+                       (inputstacksize + 1) * sizeof(*inputstack));
+       inputstacksize++;
+       curinput = inputstack[inputstacksize - 1] = file;
+}
+
+/* ARGSUSED */
+static int
+source_f(
+       int     argc,
+       char    **argv)
+{
+       FILE    *f;
+
+       f = fopen(argv[1], "r");
+       if (f == NULL)
+               dbprintf("can't open %s\n", argv[0]);
+       else
+               pushfile(f);
+       return 0;
+}
diff --git a/db/input.h b/db/input.h
new file mode 100644 (file)
index 0000000..406997c
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+extern char    **breakline(char *input, int *count);
+extern void    doneline(char *input, char **vec);
+extern char    *fetchline(void);
+extern void    input_init(void);
+extern void    pushfile(FILE *file);
diff --git a/db/io.c b/db/io.c
new file mode 100644 (file)
index 0000000..849103d
--- /dev/null
+++ b/db/io.c
@@ -0,0 +1,627 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include <errno.h>
+#include "command.h"
+#include "data.h"
+#include "type.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "inode.h"
+#include "io.h"
+#include "output.h"
+#include "mount.h"
+#include "malloc.h"
+
+static int     pop_f(int argc, char **argv);
+static void     pop_help(void);
+static int     push_f(int argc, char **argv);
+static void     push_help(void);
+static int     stack_f(int argc, char **argv);
+static void     stack_help(void);
+static int      forward_f(int argc, char **argv);
+static void     forward_help(void);
+static int      back_f(int argc, char **argv);
+static void     back_help(void);
+static int      ring_f(int argc, char **argv);
+static void     ring_help(void);
+
+static const cmdinfo_t pop_cmd =
+       { "pop", NULL, pop_f, 0, 0, 0, NULL,
+         "pop location from the stack", pop_help };
+static const cmdinfo_t push_cmd =
+       { "push", NULL, push_f, 0, 2, 0, "[command]",
+         "push location to the stack", push_help };
+static const cmdinfo_t stack_cmd =
+       { "stack", NULL, stack_f, 0, 0, 0, NULL,
+         "view the location stack", stack_help };
+static const cmdinfo_t  forward_cmd = 
+       { "forward", "f", forward_f, 0, 0, 0, NULL,
+         "move forward to next entry in the position ring", forward_help };
+static const cmdinfo_t  back_cmd = 
+       { "back", "b", back_f, 0, 0, 0, NULL,
+         "move to the previous location in the position ring", back_help };
+static const cmdinfo_t  ring_cmd = 
+       { "ring", NULL, ring_f, 0, 1, 0, NULL,
+         "show position ring or move to a specific entry", ring_help };
+
+iocur_t        *iocur_base;
+iocur_t        *iocur_top;
+int    iocur_sp = -1;
+int    iocur_len;
+
+#define RING_ENTRIES 20
+static iocur_t iocur_ring[RING_ENTRIES];
+static int     ring_head = -1;
+static int     ring_tail = -1;
+static int     ring_current = -1;
+
+void
+io_init(void)
+{
+       add_command(&pop_cmd);
+       add_command(&push_cmd);
+       add_command(&stack_cmd);
+       add_command(&forward_cmd);
+       add_command(&back_cmd);
+       add_command(&ring_cmd);
+}
+
+void
+off_cur(
+       int     off,
+       int     len)
+{
+       if (iocur_top == NULL || off + len > BBTOB(iocur_top->blen))
+               dbprintf("can't set block offset to %d\n", off);
+       else {
+               iocur_top->boff = off;
+               iocur_top->off = ((xfs_off_t)iocur_top->bb << BBSHIFT) + off;
+               iocur_top->len = len;
+               iocur_top->data = (void *)((char *)iocur_top->buf + off);
+       }
+}
+
+void
+pop_cur(void)
+{
+       if (iocur_sp < 0) {
+               dbprintf("can't pop anything from I/O stack\n");
+               return;
+       }
+       if (iocur_top->buf)
+               xfree(iocur_top->buf);
+       if (--iocur_sp >= 0) {
+               iocur_top = iocur_base + iocur_sp;
+               cur_typ = iocur_top->typ;
+       } else {
+               iocur_top = iocur_base;
+               iocur_sp = 0;
+       }
+}
+
+/*ARGSUSED*/
+static int
+pop_f(
+       int     argc,
+       char    **argv)
+{
+       pop_cur();
+       return 0;
+}
+
+static void
+pop_help(void)
+{
+       dbprintf(
+"\n"
+" Changes the address and data type to the first entry on the stack.\n"
+"\n"
+               );
+}
+
+void
+print_iocur(
+       char    *tag,
+       iocur_t *ioc)
+{
+       int     i;
+
+       dbprintf("%s\n", tag);
+       dbprintf("\tbyte offset %lld, length %d\n", ioc->off, ioc->len);
+       dbprintf("\tbuffer block %lld (fsbno %lld), %d bb%s\n", ioc->bb,
+               (xfs_dfsbno_t)XFS_DADDR_TO_FSB(mp, ioc->bb), ioc->blen,
+               ioc->blen == 1 ? "" : "s");
+       if (ioc->use_bbmap) {
+               dbprintf("\tblock map");
+               for (i = 0; i < ioc->blen; i++)
+                       dbprintf(" %d:%lld", i, ioc->bbmap.b[i]);
+               dbprintf("\n");
+       }
+       dbprintf("\tinode %lld, dir inode %lld, type %s\n", ioc->ino,
+               ioc->dirino, ioc->typ == NULL ? "none" : ioc->typ->name);
+}
+
+void
+print_ring(void)
+{
+       int i;
+       iocur_t *ioc;
+
+       if (ring_current == -1) {
+               dbprintf("no entries in location ring.\n");
+               return;
+       }
+
+       dbprintf("      type    bblock  bblen    fsbno     inode\n");
+
+       i = ring_head;
+       for (;;) {
+               ioc = &iocur_ring[i];
+               if (i == ring_current)
+                       printf("*%2d: ", i);
+               else
+                       printf(" %2d: ", i);
+
+               dbprintf("%-7.7s %8lld %5d %8lld %9lld\n",
+                        ioc->typ == NULL ? "none" : ioc->typ->name,
+                        ioc->bb,
+                        ioc->blen,
+                        (xfs_dfsbno_t)XFS_DADDR_TO_FSB(mp, ioc->bb),
+                        ioc->ino
+                       );
+
+               if (i == ring_tail)
+                       break;
+
+               i = (i+(RING_ENTRIES-1))%RING_ENTRIES;
+       }
+}
+
+
+void
+push_cur(void)
+{
+       if (iocur_sp + 1 >= iocur_len) {
+               iocur_base = xrealloc(iocur_base,
+                       sizeof(*iocur_base) * (iocur_len + 1));
+               iocur_len++;
+       }
+       iocur_sp++;
+       iocur_top = iocur_base + iocur_sp;
+       memset(iocur_top, 0, sizeof(*iocur_base));
+       iocur_top->ino = iocur_sp > 0 ? iocur_top[-1].ino : NULLFSINO;
+       iocur_top->dirino = iocur_sp > 0 ? iocur_top[-1].dirino : NULLFSINO;
+       iocur_top->mode = iocur_sp > 0 ? iocur_top[-1].mode : 0;
+       cur_typ = NULL;
+}
+
+static int
+push_f(
+       int             argc,
+       char            **argv)
+{
+       const cmdinfo_t *ct;
+
+       if (argc > 1) {
+                /* check we can execute command */
+               ct = find_command(argv[1]);
+               if (ct == NULL) {
+                       dbprintf("no such command %s\n", argv[1]);
+                       return 0;
+               }
+               if (!ct->canpush) {
+                       dbprintf("no push form allowed for %s\n", argv[1]);
+                       return 0;
+               }
+        }
+        
+        /* save current state */
+        push_cur();
+       if (iocur_top[-1].typ && iocur_top[-1].typ->typnm == TYP_INODE)
+               set_cur_inode(iocur_top[-1].ino);
+       else
+               set_cur(iocur_top[-1].typ, iocur_top[-1].bb,
+                       iocur_top[-1].blen, DB_RING_IGN, 
+                       iocur_top[-1].use_bbmap ? &iocur_top[-1].bbmap : NULL);
+
+        /* run requested command */        
+        if (argc>1)
+               (void)command(argc-1, argv+1);
+       return 0;
+}
+
+static void
+push_help(void)
+{
+       dbprintf(
+"\n"
+" Allows you to push the current address and data type on the stack for\n"
+" later return.  'push' also accepts an additional command to execute after\n"
+" storing the current address (ex: 'push a rootino' from the superblock).\n"
+"\n"
+               );
+}
+
+/* move forward through the ring */
+/* ARGSUSED */
+static int
+forward_f(
+       int             argc,
+       char            **argv)
+{
+       if (ring_current == -1) {
+               dbprintf("ring is empty\n");
+               return 0;
+       }
+       if (ring_current == ring_head) {
+               dbprintf("no further entries\n");
+               return 0;
+       }
+
+       ring_current = (ring_current+1)%RING_ENTRIES;
+
+       set_cur(iocur_ring[ring_current].typ,
+               iocur_ring[ring_current].bb,
+               iocur_ring[ring_current].blen,
+               DB_RING_IGN,
+               iocur_ring[ring_current].use_bbmap ?
+                       &iocur_ring[ring_current].bbmap : NULL);
+
+       return 0;
+}
+
+static void
+forward_help(void)
+{
+       dbprintf(
+"\n"
+" The 'forward' ('f') command moves to the next location in the position\n"
+" ring, updating the current position and data type.  If the current location\n"
+" is the top entry in the ring, then the 'forward' command will have\n"
+" no effect.\n"
+"\n"
+               );
+}
+
+/* move backwards through the ring */
+/* ARGSUSED */
+static int
+back_f(
+       int             argc,
+       char            **argv)
+{
+       if (ring_current == -1) {
+               dbprintf("ring is empty\n");
+               return 0;
+       }
+       if (ring_current == ring_tail) {
+               dbprintf("no previous entries\n");
+               return 0;
+       }
+
+       ring_current = (ring_current+(RING_ENTRIES-1))%RING_ENTRIES;
+
+       set_cur(iocur_ring[ring_current].typ,
+               iocur_ring[ring_current].bb,
+               iocur_ring[ring_current].blen,
+               DB_RING_IGN,
+               iocur_ring[ring_current].use_bbmap ?
+                       &iocur_ring[ring_current].bbmap : NULL);
+
+       return 0;
+}
+
+static void
+back_help(void)
+{
+       dbprintf(
+"\n"
+" The 'back' ('b') command moves to the previous location in the position\n"
+" ring, updating the current position and data type.  If the current location\n"
+" is the last entry in the ring, then the 'back' command will have no effect.\n"
+"\n"
+               );
+}
+
+/* show or go to specific point in ring */
+static int
+ring_f(
+       int             argc,
+       char            **argv)
+{
+       int index;
+
+       if (argc == 1) {
+               print_ring();
+               return 0;
+       }
+
+       index = (int)strtoul(argv[0], NULL, 0);
+       if (index < 0 || index >= RING_ENTRIES)
+               dbprintf("invalid entry: %d\n", index);
+
+       ring_current = index;
+
+       set_cur(iocur_ring[index].typ,
+               iocur_ring[index].bb,
+               iocur_ring[index].blen,
+               DB_RING_IGN,
+               iocur_ring[index].use_bbmap ? &iocur_ring[index].bbmap : NULL);
+
+       return 0;
+}
+
+static void
+ring_help(void)
+{
+       dbprintf(
+"\n"
+" The position ring automatically keeps track of each disk location and\n"
+" structure type for each change of position you make during your xfs_db\n"
+" session.  The last %d most recent entries are kept in the ring.\n"
+"\n"
+" To display the current list of ring entries type 'ring' by itself on\n"
+" the command line.  The entry highlighted by an asterisk ('*') is the\n"
+" current entry.\n"
+"\n"
+" To move to another entry in the ring type 'ring <num>' where <num> is\n"
+" your desired entry from the ring position list.\n"
+"\n"
+" You may also use the 'forward' ('f') or 'back' ('b') commands to move\n"
+" to the previous or next entry in the ring, respectively.\n"
+"\n"
+" Note: Unlike the 'stack', 'push' and 'pop' commands, the ring tracks your\n"
+" location implicitly.  Use the 'push' and 'pop' commands if you wish to\n"
+" store a specific location explicitly for later return.\n"
+"\n",
+               RING_ENTRIES);
+}
+
+
+void
+ring_add(void)
+{
+       if (ring_head == -1) {
+               /* only get here right after startup */
+               ring_head = 0;
+               ring_tail = 0;
+               ring_current = 0;
+               iocur_ring[0] = *iocur_top;
+       } else {
+               if (ring_current == ring_head) {
+                       ring_head = (ring_head+1)%RING_ENTRIES;
+                       iocur_ring[ring_head] = *iocur_top;
+                       if (ring_head == ring_tail)
+                               ring_tail = (ring_tail+1)%RING_ENTRIES;
+                       ring_current = ring_head;
+               } else {
+                       ring_current = (ring_current+1)%RING_ENTRIES;
+                       iocur_ring[ring_current] = *iocur_top;
+               }
+       }
+}
+
+
+int
+write_bbs(
+       __int64_t       bbno,
+       int             count,
+       void            *bufp,
+       bbmap_t         *bbmap)
+{
+       int             c;
+       int             i;
+       int             j;
+       int             rval = EINVAL;  /* initialize for zero `count' case */
+
+       for (j = 0; j < count; j += bbmap ? 1 : count) {
+               if (bbmap)
+                       bbno = bbmap->b[j];
+               if (lseek64(xfsargs.dfd, bbno << BBSHIFT, SEEK_SET) < 0) {
+                       rval = errno;
+                       dbprintf("can't seek in filesystem at bb %lld\n", bbno);
+                       return rval;
+               }
+               c = BBTOB(bbmap ? 1 : count);
+               i = (int)write(xfsargs.dfd, (char *)bufp + BBTOB(j), c);
+               if (i < 0) {
+                       rval = errno;
+               } else if (i < c) {
+                       rval = -1;
+               } else  
+                       rval = 0;
+               if (rval)
+                       break;
+       }
+       return rval;
+}
+
+int
+read_bbs(
+       __int64_t       bbno,
+       int             count,
+       void            **bufp,
+       bbmap_t         *bbmap)
+{
+       void            *buf;
+       int             c;
+       int             i;
+       int             j;
+       int             rval = EINVAL;
+        
+        if (!count)
+            return EINVAL;
+
+       c = BBTOB(count);
+       if (*bufp == NULL)
+               buf = xmalloc(c);
+       else
+               buf = *bufp;
+       for (j = 0; j < count; j += bbmap ? 1 : count) {
+               if (bbmap)
+                       bbno = bbmap->b[j];
+               if (lseek64(xfsargs.dfd, bbno << BBSHIFT, SEEK_SET) < 0) {
+                       rval = errno;
+                       dbprintf("can't seek in filesystem at bb %lld\n", bbno);
+                       if (*bufp == NULL)
+                               xfree(buf);
+                       buf = NULL;
+               } else {
+                       c = BBTOB(bbmap ? 1 : count);
+                       i = (int)read(xfsargs.dfd, (char *)buf + BBTOB(j), c);
+                       if (i < 0) {
+                               rval = errno;
+                               if (*bufp == NULL)
+                                       xfree(buf);
+                               buf = NULL;
+                       } else if (i < c) {
+                               rval = -1;
+                               if (*bufp == NULL)
+                                       xfree(buf);
+                               buf = NULL;
+                       } else  
+                               rval = 0;
+               }
+               if (buf == NULL)
+                       break;
+       }
+       if (*bufp == NULL)
+               *bufp = buf;
+       return rval;
+}
+
+void
+write_cur(void)
+{
+       int ret;
+
+       if (iocur_sp < 0) {
+               dbprintf("nothing to write\n");
+               return;
+       }
+       ret = write_bbs(iocur_top->bb, iocur_top->blen, iocur_top->buf,
+               iocur_top->use_bbmap ? &iocur_top->bbmap : NULL);
+       if (ret == -1)
+               dbprintf("incomplete write, block: %lld\n", 
+                        (iocur_base + iocur_sp)->bb);
+       else if (ret != 0)
+               dbprintf("write error: %s\n", strerror(ret));
+       /* re-read buffer from disk */
+       ret = read_bbs(iocur_top->bb, iocur_top->blen, &iocur_top->buf,
+               iocur_top->use_bbmap ? &iocur_top->bbmap : NULL);
+       if (ret == -1)
+               dbprintf("incomplete read, block: %lld\n",
+                        (iocur_base + iocur_sp)->bb);
+       else if (ret != 0)
+               dbprintf("read error: %s\n", strerror(ret));
+}
+
+void
+set_cur(
+       const typ_t     *t,
+       __int64_t       d, 
+       int             c,
+       int             ring_flag,
+       bbmap_t         *bbmap)
+{
+       xfs_ino_t       dirino;
+       xfs_ino_t       ino;
+       __uint16_t      mode;
+
+       if (iocur_sp < 0) {
+               dbprintf("set_cur no stack element to set\n");
+               return;
+       }
+
+#ifdef DEBUG
+       if (bbmap)
+               printf("xfs_db got a bbmap for %lld\n", d);
+#endif
+       ino = iocur_top->ino;
+       dirino = iocur_top->dirino;
+       mode = iocur_top->mode;
+       pop_cur();
+       push_cur();
+       if (read_bbs(d, c, &iocur_top->buf, bbmap))
+               return;
+       iocur_top->bb = d;
+       iocur_top->blen = c;
+       iocur_top->boff = 0;
+       iocur_top->data = iocur_top->buf;
+       iocur_top->len = BBTOB(c);
+       iocur_top->off = d << BBSHIFT;
+       iocur_top->typ = cur_typ = t;
+       iocur_top->ino = ino;
+       iocur_top->dirino = dirino;
+       iocur_top->mode = mode;
+       if (iocur_top->use_bbmap = (bbmap != NULL))
+               iocur_top->bbmap = *bbmap;
+
+       /* store location in ring */
+       if (ring_flag)
+               ring_add();
+}
+
+static void
+stack_help(void)
+{
+       dbprintf(
+"\n"
+" The stack is used to explicitly store your location and data type\n"
+" for later return.  The 'push' operation stores the current address\n"
+" and type on the stack, the 'pop' operation returns you to the\n"
+" position and datatype of the top entry on the stack.\n"
+"\n"
+" The 'stack' allows explicit location saves, see 'ring' for implicit\n"
+" position tracking.\n"
+"\n"
+               );
+}
+
+/*ARGSUSED*/
+static int
+stack_f(
+       int     argc,
+       char    **argv)
+{
+       int     i;
+       char    tagbuf[8];
+
+       for (i = iocur_sp; i >= 0; i--) {
+               sprintf(tagbuf, "%d: ", i);
+               print_iocur(tagbuf, &iocur_base[i]);
+       }
+       return 0;
+}
diff --git a/db/io.h b/db/io.h
new file mode 100644 (file)
index 0000000..85ee299
--- /dev/null
+++ b/db/io.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+struct typ;
+
+#define        BBMAP_SIZE              (XFS_MAX_BLOCKSIZE / BBSIZE)
+typedef struct bbmap {
+       __int64_t               b[BBMAP_SIZE];
+} bbmap_t;
+
+typedef struct iocur {
+       __int64_t               bb;     /* BB number in filesystem of buf */
+       int                     blen;   /* length of "buf", bb's */
+       int                     boff;   /* data - buf */
+       void                    *buf;   /* base address of buffer */
+       void                    *data;  /* current interesting data */
+       xfs_ino_t               dirino; /* current directory inode number */
+       xfs_ino_t               ino;    /* current inode number */
+       int                     len;    /* length of "data", bytes */
+       __uint16_t              mode;   /* current inode's mode */
+       xfs_off_t               off;    /* fs offset of "data" in bytes */
+       const struct typ        *typ;   /* type of "data" */
+       int                     use_bbmap; /* set if bbmap is valid */
+       bbmap_t                 bbmap;  /* map daddr if fragmented */
+} iocur_t;
+
+#define DB_RING_ADD 1                   /* add to ring on set_cur */
+#define DB_RING_IGN 0                   /* do not add to ring on set_cur */
+
+extern iocur_t *iocur_base;            /* base of stack */
+extern iocur_t *iocur_top;             /* top element of stack */
+extern int     iocur_sp;               /* current top of stack */
+extern int     iocur_len;              /* length of stack array */
+
+extern void    io_init(void);
+extern void    off_cur(int off, int len);
+extern void    pop_cur(void);
+extern void    print_iocur(char *tag, iocur_t *ioc);
+extern void    push_cur(void);
+extern int     read_bbs(__int64_t daddr, int count, void **bufp,
+                        bbmap_t *bbmap);
+extern int     write_bbs(__int64_t daddr, int count, void *bufp,
+                         bbmap_t *bbmap);
+extern void     write_cur(void);
+extern void    set_cur(const struct typ *t, __int64_t d, int c, int ring_add,
+                       bbmap_t *bbmap);
+extern void     ring_add(void);
diff --git a/db/main.c b/db/main.c
new file mode 100644 (file)
index 0000000..e000461
--- /dev/null
+++ b/db/main.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "command.h"
+#include "data.h"
+#include "init.h"
+#include "input.h"
+
+int
+main(
+       int     argc,
+       char    **argv)
+{
+       int     c;
+       int     done;
+       char    *input;
+       char    **v;
+
+       pushfile(stdin);
+       init(argc, argv);
+       done = 0;
+       while (!done) {
+               if ((input = fetchline()) == NULL)
+                       break;
+               v = breakline(input, &c);
+               if (c)
+                       done = command(c, v);
+               doneline(input, v);
+       }
+       return exitcode;
+}
diff --git a/db/malloc.c b/db/malloc.c
new file mode 100644 (file)
index 0000000..413b87f
--- /dev/null
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "init.h"
+#include "malloc.h"
+#include "output.h"
+
+static void
+badmalloc(void)
+{
+       dbprintf("%s: out of memory\n", progname);
+       exit(4);
+}
+
+void *
+xcalloc(
+       size_t  nelem,
+       size_t  elsize)
+{
+       void    *ptr;
+
+       ptr = calloc(nelem, elsize);
+       if (ptr)
+               return ptr;
+       badmalloc();
+       /* NOTREACHED */
+       return NULL;
+}
+
+void
+xfree(
+       void    *ptr)
+{
+       free(ptr);
+}
+
+void *
+xmalloc(
+       size_t  size)
+{
+       void    *ptr;
+
+       ptr = malloc(size);
+       if (ptr)
+               return ptr;
+       badmalloc();
+       /* NOTREACHED */
+       return NULL;
+}
+
+void *
+xrealloc(
+       void    *ptr,
+       size_t  size)
+{
+       ptr = realloc(ptr, size);
+       if (ptr || !size)
+               return ptr;
+       badmalloc();
+       /* NOTREACHED */
+       return NULL;
+}
+
+char *
+xstrdup(
+       const char      *s1)
+{
+       char            *s;
+
+       s = strdup(s1);
+       if (s)
+               return s;
+       badmalloc();
+       /* NOTREACHED */
+       return NULL;
+}
diff --git a/db/malloc.h b/db/malloc.h
new file mode 100644 (file)
index 0000000..1680a44
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+extern void    *xcalloc(size_t nelem, size_t elsize);
+extern void    xfree(void *ptr);
+extern void    *xmalloc(size_t size);
+extern void    *xrealloc(void *ptr, size_t size);
+extern char    *xstrdup(const char *s1);
diff --git a/db/mount.c b/db/mount.c
new file mode 100644 (file)
index 0000000..1849727
--- /dev/null
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "init.h"
+#include "io.h"
+#include "mount.h"
+#include "malloc.h"
+#include "data.h"
+
+xfs_mount_t    *mp;
+
+static void
+compute_maxlevels(
+       xfs_mount_t     *mp,
+       int             whichfork)
+{
+       int             level;
+       uint            maxblocks;
+       uint            maxleafents;
+       int             maxrootrecs;
+       int             minleafrecs;
+       int             minnoderecs;
+       int             sz;
+
+       maxleafents = (whichfork == XFS_DATA_FORK) ? MAXEXTNUM : MAXAEXTNUM;
+       minleafrecs = mp->m_bmap_dmnr[0];
+       minnoderecs = mp->m_bmap_dmnr[1];
+       sz = mp->m_sb.sb_inodesize;
+       maxrootrecs = (int)XFS_BTREE_BLOCK_MAXRECS(sz, xfs_bmdr, 0);
+       maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
+       for (level = 1; maxblocks > 1; level++) {
+               if (maxblocks <= maxrootrecs)
+                       maxblocks = 1;
+               else
+                       maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
+       }
+       mp->m_bm_maxlevels[whichfork] = level;
+}
+
+xfs_mount_t *
+dbmount(void)
+{
+       void            *bufp;
+       int             i;
+       xfs_mount_t     *mp;
+       xfs_sb_t        *sbp;
+
+       mp = xcalloc(1, sizeof(*mp));
+       bufp = NULL;
+       if (read_bbs(XFS_SB_DADDR, 1, &bufp, NULL))
+               return NULL;
+
+        /* copy sb from buf to in-core, converting architecture */
+        libxfs_xlate_sb(bufp, &mp->m_sb, 1, ARCH_CONVERT, XFS_SB_ALL_BITS);
+       xfree(bufp);
+       sbp = &mp->m_sb;
+        if (sbp->sb_magicnum != XFS_SB_MAGIC) {
+            fprintf(stderr,"%s: unexpected XFS SB magic number 0x%08x\n",
+                    progname, sbp->sb_magicnum);
+        }
+       mp->m_blkbit_log = sbp->sb_blocklog + XFS_NBBYLOG;
+       mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT;
+       mp->m_agno_log = libxfs_highbit32(sbp->sb_agcount - 1) + 1;
+       mp->m_agino_log = sbp->sb_inopblog + sbp->sb_agblklog;
+       mp->m_litino =
+               (int)(sbp->sb_inodesize -
+                     (sizeof(xfs_dinode_core_t) + sizeof(xfs_agino_t)));
+       mp->m_blockmask = sbp->sb_blocksize - 1;
+       mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG;
+       mp->m_blockwmask = mp->m_blockwsize - 1;
+       for (i = 0; i < 2; i++) {
+               mp->m_alloc_mxr[i] =
+                       (uint)XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize,
+                               xfs_alloc, i == 0);
+               mp->m_alloc_mnr[i] =
+                       (uint)XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize,
+                               xfs_alloc, i == 0);
+               mp->m_bmap_dmxr[i] =
+                       (uint)XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize,
+                               xfs_bmbt, i == 0);
+               mp->m_bmap_dmnr[i] =
+                       (uint)XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize,
+                               xfs_bmbt, i == 0);
+               mp->m_inobt_mxr[i] =
+                       (uint)XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize,
+                               xfs_inobt, i == 0);
+               mp->m_inobt_mnr[i] =
+                       (uint)XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize,
+                               xfs_inobt, i == 0);
+       }
+       compute_maxlevels(mp, XFS_DATA_FORK);
+       compute_maxlevels(mp, XFS_ATTR_FORK);
+       mp->m_bsize = XFS_FSB_TO_BB(mp, 1);
+       mp->m_ialloc_inos = (int)MAX(XFS_INODES_PER_CHUNK, sbp->sb_inopblock);
+       mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog;
+       if (sbp->sb_rblocks) {
+               mp->m_rsumlevels = sbp->sb_rextslog + 1;
+               mp->m_rsumsize =
+                       (uint)sizeof(xfs_suminfo_t) * mp->m_rsumlevels *
+                       sbp->sb_rbmblocks;
+               if (sbp->sb_blocksize)
+                       mp->m_rsumsize =
+                               roundup(mp->m_rsumsize, sbp->sb_blocksize);
+       }
+       if (XFS_SB_VERSION_HASDIRV2(sbp)) {
+               mp->m_dirversion = 2;
+               mp->m_dirblksize =
+                       1 << (sbp->sb_dirblklog + sbp->sb_blocklog);
+               mp->m_dirblkfsbs = 1 << sbp->sb_dirblklog;
+               mp->m_dirdatablk =
+                       XFS_DIR2_DB_TO_DA(mp, XFS_DIR2_DATA_FIRSTDB(mp));
+               mp->m_dirleafblk =
+                       XFS_DIR2_DB_TO_DA(mp, XFS_DIR2_LEAF_FIRSTDB(mp));
+               mp->m_dirfreeblk =
+                       XFS_DIR2_DB_TO_DA(mp, XFS_DIR2_FREE_FIRSTDB(mp));
+       } else {
+               mp->m_dirversion = 1;
+               mp->m_dirblksize = sbp->sb_blocksize;
+               mp->m_dirblkfsbs = 1;
+       }
+       return mp;
+}
diff --git a/db/mount.h b/db/mount.h
new file mode 100644 (file)
index 0000000..72348ad
--- /dev/null
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+extern xfs_mount_t     *dbmount(void);
+extern xfs_mount_t     *mp;
diff --git a/db/output.c b/db/output.c
new file mode 100644 (file)
index 0000000..b49b365
--- /dev/null
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include <stdarg.h>
+#include "command.h"
+#include "output.h"
+#include "sig.h"
+#include "malloc.h"
+#include "init.h"
+
+static int     log_f(int argc, char **argv);
+
+static const cmdinfo_t log_cmd =
+       { "log", NULL, log_f, 0, 2, 0, "[stop|start <filename>]",
+         "start or stop logging to a file", NULL };
+
+int            dbprefix;
+static FILE    *log_file;
+static char    *log_file_name;
+
+int
+dbprintf(const char *fmt, ...)
+{
+       va_list ap;
+       int     i;
+
+       if (seenint())
+               return 0;
+       va_start(ap, fmt);
+       blockint();
+       i = 0;
+       if (dbprefix)
+               i += printf("%s: ", fsdevice);
+       i += vprintf(fmt, ap);
+       unblockint();
+       va_end(ap);
+       if (log_file) {
+               va_start(ap, fmt);
+               vfprintf(log_file, fmt, ap);
+               va_end(ap);
+       }
+       return i;
+}
+
+static int
+log_f(
+       int             argc,
+       char            **argv)
+{
+       if (argc == 1) {
+               if (log_file)
+                       dbprintf("logging to %s\n", log_file_name);
+               else
+                       dbprintf("no log file\n");
+       } else if (argc == 2 && strcmp(argv[1], "stop") == 0) {
+               if (log_file) {
+                       xfree(log_file_name);
+                       fclose(log_file);
+                       log_file = NULL;
+               } else
+                       dbprintf("no log file\n");
+       } else if (argc == 3 && strcmp(argv[1], "start") == 0) {
+               if (log_file)
+                       dbprintf("already logging to %s\n", log_file_name);
+               else {
+                       log_file = fopen(argv[2], "a");
+                       if (log_file == NULL)
+                               dbprintf("can't open %s for writing\n",
+                                       argv[2]);
+                       else
+                               log_file_name = xstrdup(argv[1]);
+               }
+       } else
+               dbprintf("bad log command, ignored\n");
+       return 0;
+}
+
+void
+logprintf(const char *fmt, ...)
+{
+       va_list ap;
+
+       if (log_file) {
+               va_start(ap, fmt);
+               (void)vfprintf(log_file, fmt, ap);
+               va_end(ap);
+       }
+}
+
+void
+output_init(void)
+{
+       add_command(&log_cmd);
+}
diff --git a/db/output.h b/db/output.h
new file mode 100644 (file)
index 0000000..27861ff
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+extern int     dbprefix;
+
+extern int     dbprintf(const char *, ...);
+extern void    logprintf(const char *, ...);
+extern void    output_init(void);
diff --git a/db/print.c b/db/print.c
new file mode 100644 (file)
index 0000000..f4c7479
--- /dev/null
@@ -0,0 +1,310 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "command.h"
+#include "data.h"
+#include "type.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "io.h"
+#include "print.h"
+#include "bit.h"
+#include "flist.h"
+#include "strvec.h"
+#include "output.h"
+#include "sig.h"
+#include "write.h"
+
+static void    print_allfields(const struct field *fields);
+static int     print_f(int argc, char **argv);
+static void    print_flist_1(struct flist *flist, char **pfx, int parentoff);
+static void    print_somefields(const struct field *fields, int argc,
+                                char **argv);
+
+static const cmdinfo_t print_cmd =
+       { "print", "p", print_f, 0, -1, 0, "[value]...",
+         "print field values", NULL };
+
+static void
+print_allfields(
+       const field_t   *fields)
+{
+       flist_t         *flist;
+#ifdef DEBUG
+       int             i;
+#endif
+
+       flist = flist_make("");
+       flist->fld = fields;
+#ifndef DEBUG
+       (void)flist_parse(fields, flist, iocur_top->data, 0);
+#else
+       i = flist_parse(fields, flist, iocur_top->data, 0);
+       ASSERT(i == 1);
+#endif
+       flist_print(flist);
+       print_flist(flist);
+       flist_free(flist);
+}
+
+static int
+print_f(
+       int     argc,
+       char    **argv)
+{
+       pfunc_t pf;
+
+       if (cur_typ == NULL) {
+               dbprintf("no current type\n");
+               return 0;
+       }
+       pf = cur_typ->pfunc;
+       if (pf == NULL) {
+               dbprintf("no print function for type %s\n", cur_typ->name);
+               return 0;
+       }
+       argc--;
+       argv++;
+       (*pf)(DB_READ, cur_typ->fields, argc, argv);
+       return 0;
+}
+
+void
+print_flist(
+       flist_t *flist)
+{
+       char    **pfx;
+
+       pfx = new_strvec(0);
+       print_flist_1(flist, pfx, 0);
+       free_strvec(pfx);
+}
+
+static void
+print_flist_1(
+       flist_t         *flist,
+       char            **ppfx,
+       int             parentoff)
+{
+       char            buf[16];
+       const field_t   *f;
+       const ftattr_t  *fa;
+       flist_t         *fl;
+       int             low;
+       int             neednl;
+       char            **pfx;
+
+       for (fl = flist; fl && !seenint(); fl = fl->sibling) {
+               pfx = copy_strvec(ppfx);
+               if (fl->name[0])
+                       add_strvec(&pfx, fl->name);
+               if (fl->flags & FL_OKLOW) {
+                       add_strvec(&pfx, "[");
+                       sprintf(buf, "%d", fl->low);
+                       add_strvec(&pfx, buf);
+                       if (fl->low != fl->high) {
+                               add_strvec(&pfx, "-");
+                               sprintf(buf, "%d", fl->high);
+                               add_strvec(&pfx, buf);
+                       }
+                       add_strvec(&pfx, "]");
+               }
+               if (fl->child) {
+                       if (fl->name[0])
+                               add_strvec(&pfx, ".");
+                       print_flist_1(fl->child, pfx, fl->offset);
+               } else {
+                       f = fl->fld;
+                       fa = &ftattrtab[f->ftyp];
+                       ASSERT(fa->ftyp == f->ftyp);
+                       print_strvec(pfx);
+                       dbprintf(" = ");
+                       if (fl->flags & FL_OKLOW)
+                               low = fl->low;
+                       else
+                               low = 0;
+                       if (fa->prfunc) {
+                               neednl = fa->prfunc(iocur_top->data, fl->offset,
+                                       fcount(f, iocur_top->data, parentoff),
+                                       fa->fmtstr,
+                                       fsize(f, iocur_top->data, parentoff, 0),
+                                       fa->arg, low,
+                                       (f->flags & FLD_ARRAY) != 0);
+                               if (neednl)
+                                       dbprintf("\n");
+                       } else {
+                               ASSERT(fa->arg & FTARG_OKEMPTY);
+                               dbprintf("(empty)\n");
+                       }
+               }
+               free_strvec(pfx);
+       }
+}
+
+void
+print_init(void)
+{
+       add_command(&print_cmd);
+}
+
+void
+print_sarray(
+       void            *obj,
+       int             bit,
+       int             count,
+       int             size,
+       int             base,
+       int             array,
+       const field_t   *flds,
+       int             skipnms)
+{
+       int             bitoff;
+       const field_t   *f;
+       const ftattr_t  *fa;
+       int             first;
+       int             i;
+
+       ASSERT(bitoffs(bit) == 0);
+       if (skipnms == 0) {
+               for (f = flds, first = 1; f->name; f++) {
+                       if (f->flags & FLD_SKIPALL)
+                               continue;
+                       dbprintf("%c%s", first ? '[' : ',', f->name);
+                       first = 0;
+               }
+               dbprintf("] ");
+       }
+       for (i = 0, bitoff = bit;
+            i < count && !seenint();
+            i++, bitoff += size) {
+               if (array)
+                       dbprintf("%d:", i + base);
+               for (f = flds, first = 1; f->name; f++) {
+                       if (f->flags & FLD_SKIPALL)
+                               continue;
+                       fa = &ftattrtab[f->ftyp];
+                       ASSERT(fa->ftyp == f->ftyp);
+                       dbprintf("%c", first ? '[' : ',');
+                       first = 0;
+                       if (fa->prfunc)
+                               fa->prfunc(obj,
+                                       bitoff +
+                                           bitoffset(f, obj, bitoff, i + base),
+                                       fcount(f, obj, bitoff), fa->fmtstr,
+                                       fsize(f, obj, bitoff, i + base),
+                                       fa->arg, (f->flags & FLD_ABASE1) != 0,
+                                       f->flags & FLD_ARRAY);
+                       else {
+                               ASSERT(fa->arg & FTARG_OKEMPTY);
+                               dbprintf("(empty)");
+                       }
+               }
+               dbprintf("]");
+               if (i < count - 1)
+                       dbprintf(" ");
+       }
+}
+
+static void
+print_somefields(
+       const field_t   *fields,
+       int             argc,
+       char            **argv)
+{
+       const ftattr_t  *fa;
+       flist_t         *fl;
+       flist_t         *lfl;
+       flist_t         *nfl;
+
+       fl = lfl = NULL;
+       while (argc > 0) {
+               nfl = flist_scan(*argv);
+               if (!nfl) {
+                       if (fl)
+                               flist_free(fl);
+                       return;
+               }
+               if (lfl)
+                       lfl->sibling = nfl;
+               else
+                       fl = nfl;
+               lfl = nfl;
+               argc--;
+               argv++;
+       }
+       if (fields->name[0] == '\0') {
+               fa = &ftattrtab[fields->ftyp];
+               ASSERT(fa->ftyp == fields->ftyp);
+               fields = fa->subfld;
+       }
+       if (!flist_parse(fields, fl, iocur_top->data, 0)) {
+               flist_free(fl);
+               return;
+       }
+       flist_print(fl);
+       print_flist(fl);
+       flist_free(fl);
+}
+
+/*ARGSUSED*/
+void
+print_string(
+       const field_t   *fields,
+       int             argc,
+       char            **argv)
+{
+       char            *cp;
+
+       if (argc != 0)
+               dbprintf("no arguments allowed\n");
+       dbprintf("\"");
+       for (cp = iocur_top->data;
+            cp < (char *)iocur_top->data + iocur_top->len && *cp &&
+                    !seenint();
+            cp++)
+               dbprintf("%c", *cp);
+       dbprintf("\"\n");
+}
+
+void
+print_struct(
+       const field_t   *fields,
+       int             argc,
+       char            **argv)
+{
+       if (argc == 0)
+               print_allfields(fields);
+       else
+               print_somefields(fields, argc, argv);
+}
diff --git a/db/print.h b/db/print.h
new file mode 100644 (file)
index 0000000..81ae2c0
--- /dev/null
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+struct field;
+struct flist;
+
+extern void    print_flist(struct flist *flist);
+extern void    print_init(void);
+extern void    print_sarray(void *obj, int bit, int count, int size, int base,
+                            int array, const field_t *flds, int skipnms);
+extern void    print_struct(const struct field *fields, int argc, char **argv);
+extern void    print_string(const struct field *fields, int argc, char **argv);
diff --git a/db/quit.c b/db/quit.c
new file mode 100644 (file)
index 0000000..1a93178
--- /dev/null
+++ b/db/quit.c
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "command.h"
+#include "quit.h"
+
+static int     quit_f(int argc, char **argv);
+
+static const cmdinfo_t quit_cmd =
+       { "quit", "q", quit_f, 0, 0, 0, NULL,
+         "exit xfs_db", NULL };
+
+static int
+quit_f(
+       int     argc,
+       char    **argv)
+{
+       return 1;
+}
+
+void
+quit_init(void)
+{
+       add_command(&quit_cmd);
+}
diff --git a/db/quit.h b/db/quit.h
new file mode 100644 (file)
index 0000000..0e3e50d
--- /dev/null
+++ b/db/quit.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+extern void    quit_init(void);
diff --git a/db/sb.c b/db/sb.c
new file mode 100644 (file)
index 0000000..dc40b7d
--- /dev/null
+++ b/db/sb.c
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "command.h"
+#include "data.h"
+#include "type.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "io.h"
+#include "sb.h"
+#include "bit.h"
+#include "output.h"
+#include "mount.h"
+
+static int     sb_f(int argc, char **argv);
+static void     sb_help(void);
+
+static const cmdinfo_t sb_cmd =
+       { "sb", NULL, sb_f, 0, 1, 1, "[agno]",
+         "set current address to sb header", sb_help };
+
+const field_t  sb_hfld[] = {
+       { "", FLDT_SB, OI(0), C1, 0, TYP_NONE },
+       { NULL }
+};
+
+#define        OFF(f)  bitize(offsetof(xfs_sb_t, sb_ ## f))
+#define        SZC(f)  szcount(xfs_sb_t, sb_ ## f)
+const field_t  sb_flds[] = {
+       { "magicnum", FLDT_UINT32X, OI(OFF(magicnum)), C1, 0, TYP_NONE },
+       { "blocksize", FLDT_UINT32D, OI(OFF(blocksize)), C1, 0, TYP_NONE },
+       { "dblocks", FLDT_DRFSBNO, OI(OFF(dblocks)), C1, 0, TYP_NONE },
+       { "rblocks", FLDT_DRFSBNO, OI(OFF(rblocks)), C1, 0, TYP_NONE },
+       { "rextents", FLDT_DRTBNO, OI(OFF(rextents)), C1, 0, TYP_NONE },
+       { "uuid", FLDT_UUID, OI(OFF(uuid)), C1, 0, TYP_NONE },
+       { "logstart", FLDT_DFSBNO, OI(OFF(logstart)), C1, 0, TYP_LOG },
+       { "rootino", FLDT_INO, OI(OFF(rootino)), C1, 0, TYP_INODE },
+       { "rbmino", FLDT_INO, OI(OFF(rbmino)), C1, 0, TYP_INODE },
+       { "rsumino", FLDT_INO, OI(OFF(rsumino)), C1, 0, TYP_INODE },
+       { "rextsize", FLDT_AGBLOCK, OI(OFF(rextsize)), C1, 0, TYP_NONE },
+       { "agblocks", FLDT_AGBLOCK, OI(OFF(agblocks)), C1, 0, TYP_NONE },
+       { "agcount", FLDT_AGNUMBER, OI(OFF(agcount)), C1, 0, TYP_NONE },
+       { "rbmblocks", FLDT_EXTLEN, OI(OFF(rbmblocks)), C1, 0, TYP_NONE },
+       { "logblocks", FLDT_EXTLEN, OI(OFF(logblocks)), C1, 0, TYP_NONE },
+       { "versionnum", FLDT_UINT16X, OI(OFF(versionnum)), C1, 0, TYP_NONE },
+       { "sectsize", FLDT_UINT16D, OI(OFF(sectsize)), C1, 0, TYP_NONE },
+       { "inodesize", FLDT_UINT16D, OI(OFF(inodesize)), C1, 0, TYP_NONE },
+       { "inopblock", FLDT_UINT16D, OI(OFF(inopblock)), C1, 0, TYP_NONE },
+       { "fname", FLDT_CHARNS, OI(OFF(fname)), CI(SZC(fname)), 0, TYP_NONE },
+       { "blocklog", FLDT_UINT8D, OI(OFF(blocklog)), C1, 0, TYP_NONE },
+       { "sectlog", FLDT_UINT8D, OI(OFF(sectlog)), C1, 0, TYP_NONE },
+       { "inodelog", FLDT_UINT8D, OI(OFF(inodelog)), C1, 0, TYP_NONE },
+       { "inopblog", FLDT_UINT8D, OI(OFF(inopblog)), C1, 0, TYP_NONE },
+       { "agblklog", FLDT_UINT8D, OI(OFF(agblklog)), C1, 0, TYP_NONE },
+       { "rextslog", FLDT_UINT8D, OI(OFF(rextslog)), C1, 0, TYP_NONE },
+       { "inprogress", FLDT_UINT8D, OI(OFF(inprogress)), C1, 0, TYP_NONE },
+       { "imax_pct", FLDT_UINT8D, OI(OFF(imax_pct)), C1, 0, TYP_NONE },
+       { "icount", FLDT_UINT64D, OI(OFF(icount)), C1, 0, TYP_NONE },
+       { "ifree", FLDT_UINT64D, OI(OFF(ifree)), C1, 0, TYP_NONE },
+       { "fdblocks", FLDT_UINT64D, OI(OFF(fdblocks)), C1, 0, TYP_NONE },
+       { "frextents", FLDT_UINT64D, OI(OFF(frextents)), C1, 0, TYP_NONE },
+       { "uquotino", FLDT_INO, OI(OFF(uquotino)), C1, 0, TYP_INODE },
+       { "pquotino", FLDT_INO, OI(OFF(pquotino)), C1, 0, TYP_INODE },
+       { "qflags", FLDT_UINT16X, OI(OFF(qflags)), C1, 0, TYP_NONE },
+       { "flags", FLDT_UINT8X, OI(OFF(flags)), C1, 0, TYP_NONE },
+       { "shared_vn", FLDT_UINT8D, OI(OFF(shared_vn)), C1, 0, TYP_NONE },
+       { "inoalignmt", FLDT_EXTLEN, OI(OFF(inoalignmt)), C1, 0, TYP_NONE },
+       { "unit", FLDT_UINT32D, OI(OFF(unit)), C1, 0, TYP_NONE },
+       { "width", FLDT_UINT32D, OI(OFF(width)), C1, 0, TYP_NONE },
+       { "dirblklog", FLDT_UINT8D, OI(OFF(dirblklog)), C1, 0, TYP_NONE },
+       { NULL }
+};
+
+static void
+sb_help(void)
+{
+       dbprintf(
+"\n"
+" set allocation group superblock\n"
+"\n"
+" Example:\n"
+"\n"
+" 'sb 7' - set location to 7th allocation group superblock, set type to 'sb'\n"
+"\n"
+" Located in the 1st 512 byte block of each allocation group,\n"
+" the superblock contains the base information for the filesystem.\n"
+" The superblock in allocation group 0 is the primary.  The copies in the\n"
+" remaining allocation groups only serve as backup for filesystem recovery.\n"
+" The icount/ifree/fdblocks/frextents are only updated in superblock 0.\n"
+"\n"
+);
+}
+
+static int
+sb_f(
+       int             argc,
+       char            **argv)
+{
+       xfs_agnumber_t  agno;
+       char            *p;
+
+       if (argc > 1) {
+               agno = (xfs_agnumber_t)strtoul(argv[1], &p, 0);
+               if (*p != '\0' || agno >= mp->m_sb.sb_agcount) {
+                       dbprintf("bad allocation group number %s\n", argv[1]);
+                       return 0;
+               }
+               cur_agno = agno;
+       } else if (cur_agno == NULLAGNUMBER)
+               cur_agno = 0;
+       ASSERT(typtab[TYP_SB].typnm == TYP_SB);
+       set_cur(&typtab[TYP_SB], XFS_AG_DADDR(mp, cur_agno, XFS_SB_DADDR), 1,
+               DB_RING_ADD, NULL);
+       return 0;
+}
+
+void
+sb_init(void)
+{
+       add_command(&sb_cmd);
+}
+
+/*ARGSUSED*/
+int
+sb_size(
+       void    *obj,
+       int     startoff,
+       int     idx)
+{
+       return bitize(mp->m_sb.sb_sectsize);
+}
diff --git a/db/sb.h b/db/sb.h
new file mode 100644 (file)
index 0000000..5d646ce
--- /dev/null
+++ b/db/sb.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+struct field;
+
+extern const struct field      sb_flds[];
+extern const struct field      sb_hfld[];
+
+extern void    sb_init(void);
+extern int     sb_size(void *obj, int startoff, int idx);
diff --git a/db/sig.c b/db/sig.c
new file mode 100644 (file)
index 0000000..9b70ced
--- /dev/null
+++ b/db/sig.c
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include <signal.h>
+#include "sig.h"
+
+static int     gotintr;
+static sigset_t        intrset;
+
+static void
+interrupt(int sig, siginfo_t *info, void *uc)
+{
+       gotintr = 1;
+}
+
+void
+blockint(void)
+{
+       sigprocmask(SIG_BLOCK, &intrset, NULL);
+}
+
+void
+clearint(void)
+{
+       gotintr = 0;
+}
+
+void
+init_sig(void)
+{
+       struct sigaction sa;
+
+       memset(&sa, 0, sizeof(sa));
+       sa.sa_sigaction = interrupt;
+       sigaction(SIGINT, &sa, NULL);
+       sigemptyset(&intrset);
+       sigaddset(&intrset, SIGINT);
+}
+
+int
+seenint(void)
+{
+       return gotintr;
+}
+
+void
+unblockint(void)
+{
+       sigprocmask(SIG_UNBLOCK, &intrset, NULL);
+}
diff --git a/db/sig.h b/db/sig.h
new file mode 100644 (file)
index 0000000..8bea247
--- /dev/null
+++ b/db/sig.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+extern void    blockint(void);
+extern void    clearint(void);
+extern void    init_sig(void);
+extern int     seenint(void);
+extern void    unblockint(void);
diff --git a/db/strvec.c b/db/strvec.c
new file mode 100644 (file)
index 0000000..d346188
--- /dev/null
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "strvec.h"
+#include "output.h"
+#include "malloc.h"
+
+static int     count_strvec(char **vec);
+
+void
+add_strvec(
+       char    ***vecp,
+       char    *str)
+{
+       char    *dup;
+       int     i;
+       char    **vec;
+
+       dup = xstrdup(str);
+       vec = *vecp;
+       i = count_strvec(vec);
+       vec = xrealloc(vec, sizeof(*vec) * (i + 2));
+       vec[i] = dup;
+       vec[i + 1] = NULL;
+       *vecp = vec;
+}
+
+char **
+copy_strvec(
+       char    **vec)
+{
+       int     i;
+       char    **rval;
+
+       i = count_strvec(vec);
+       rval = new_strvec(i);
+       for (i = 0; vec[i] != NULL; i++)
+               rval[i] = xstrdup(vec[i]);
+       return rval;
+}
+
+static int
+count_strvec(
+       char    **vec)
+{
+       int     i;
+
+       for (i = 0; vec[i] != NULL; i++)
+               continue;
+       return i;
+}
+
+void
+free_strvec(
+       char    **vec)
+{
+       int     i;
+
+       for (i = 0; vec[i] != NULL; i++)
+               xfree(vec[i]);
+       xfree(vec);
+}
+
+char **
+new_strvec(
+       int     count)
+{
+       char    **rval;
+
+       rval = xmalloc(sizeof(*rval) * (count + 1));
+       rval[count] = NULL;
+       return rval;
+}
+
+void
+print_strvec(
+       char    **vec)
+{
+       int     i;
+
+       for (i = 0; vec[i] != NULL; i++)
+               dbprintf("%s", vec[i]);
+}
diff --git a/db/strvec.h b/db/strvec.h
new file mode 100644 (file)
index 0000000..f74f379
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+extern void    add_strvec(char ***vecp, char *str);
+extern char    **copy_strvec(char **vec);
+extern void    free_strvec(char **vec);
+extern char    **new_strvec(int count);
+extern void    print_strvec(char **vec);
diff --git a/db/type.c b/db/type.c
new file mode 100644 (file)
index 0000000..50381c4
--- /dev/null
+++ b/db/type.c
@@ -0,0 +1,197 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "agf.h"
+#include "agfl.h"
+#include "agi.h"
+#include "block.h"
+#include "command.h"
+#include "data.h"
+#include "type.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "print.h"
+#include "sb.h"
+#include "inode.h"
+#include "bnobt.h"
+#include "cntbt.h"
+#include "inobt.h"
+#include "bmapbt.h"
+#include "bmroot.h"
+#include "agf.h"
+#include "agfl.h"
+#include "agi.h"
+#include "dir.h"
+#include "dirshort.h"
+#include "io.h"
+#include "output.h"
+#include "write.h"
+#include "attr.h"
+#include "dquot.h"
+#include "dir2.h"
+
+static const typ_t     *findtyp(char *name);
+static int             type_f(int argc, char **argv);
+
+const typ_t    *cur_typ;
+
+static const cmdinfo_t type_cmd =
+       { "type", NULL, type_f, 0, 1, 1, "[newtype]",
+         "set/show current data type", NULL };
+
+const typ_t    typtab[] = {
+       { TYP_AGF, "agf", handle_struct, agf_hfld },
+       { TYP_AGFL, "agfl", handle_struct, agfl_hfld },
+       { TYP_AGI, "agi", handle_struct, agi_hfld },
+       { TYP_ATTR, "attr", handle_struct, attr_hfld },
+       { TYP_BMAPBTA, "bmapbta", handle_struct, bmapbta_hfld },
+       { TYP_BMAPBTD, "bmapbtd", handle_struct, bmapbtd_hfld },
+       { TYP_BNOBT, "bnobt", handle_struct, bnobt_hfld },
+       { TYP_CNTBT, "cntbt", handle_struct, cntbt_hfld },
+       { TYP_DATA, "data", handle_block, NULL },
+       { TYP_DIR, "dir", handle_struct, dir_hfld },
+       { TYP_DIR2, "dir2", handle_struct, dir2_hfld },
+       { TYP_DQBLK, "dqblk", handle_struct, dqblk_hfld },
+       { TYP_INOBT, "inobt", handle_struct, inobt_hfld },
+       { TYP_INODATA, "inodata", NULL, NULL },
+       { TYP_INODE, "inode", handle_struct, inode_hfld },
+       { TYP_LOG, "log", NULL, NULL },
+       { TYP_RTBITMAP, "rtbitmap", NULL, NULL },
+       { TYP_RTSUMMARY, "rtsummary", NULL, NULL },
+       { TYP_SB, "sb", handle_struct, sb_hfld },
+       { TYP_SYMLINK, "symlink", handle_string, NULL },
+       { TYP_NONE, NULL }
+};
+
+static const typ_t *
+findtyp(
+       char            *name)
+{
+       const typ_t     *tt;
+
+       for (tt = typtab; tt->name != NULL; tt++) {
+               ASSERT(tt->typnm == (typnm_t)(tt - typtab));
+               if (strcmp(tt->name, name) == 0)
+                       return tt;
+       }
+       return NULL;
+}
+
+static int
+type_f(
+       int             argc,
+       char            **argv)
+{
+       const typ_t     *tt;
+       int count = 0;
+
+       if (argc == 1) {
+               if (cur_typ == NULL)
+                       dbprintf("no current type\n");
+               else
+                       dbprintf("current type is \"%s\"\n", cur_typ->name);
+
+               dbprintf("\n supported types are:\n ");
+               for (tt = typtab, count = 0; tt->name != NULL; tt++) {
+                       if ((tt+1)->name != NULL) {
+                               dbprintf("%s, ", tt->name);
+                               if ((++count % 8) == 0)
+                                       dbprintf("\n ");
+                       } else {
+                               dbprintf("%s\n", tt->name);
+                       }
+               }
+
+               
+       } else {
+               tt = findtyp(argv[1]);
+               if (tt == NULL) {
+                       dbprintf("no such type %s\n", argv[1]);
+                } else {
+                        if (iocur_top->typ == NULL) {
+                            dbprintf("no current object\n");
+                        } else {
+                           iocur_top->typ = cur_typ = tt;
+                        }
+                }
+       }
+       return 0;
+}
+
+void
+type_init(void)
+{
+       add_command(&type_cmd);
+}
+
+/* read/write selectors for each major data type */
+
+void
+handle_struct(
+       int           action,
+       const field_t *fields,
+       int           argc,
+       char          **argv)
+{
+       if (action == DB_WRITE)
+               write_struct(fields, argc, argv);
+       else
+               print_struct(fields, argc, argv);
+}
+
+void
+handle_string(
+       int           action,
+       const field_t *fields,
+       int           argc,
+       char          **argv)
+{
+       if (action == DB_WRITE)
+               write_string(fields, argc, argv);
+       else
+               print_string(fields, argc, argv);
+}
+
+void
+handle_block(
+       int           action,
+       const field_t *fields,
+       int           argc,
+       char          **argv)
+{
+       if (action == DB_WRITE)
+               write_block(fields, argc, argv);
+       else
+               print_block(fields, argc, argv);
+}
diff --git a/db/type.h b/db/type.h
new file mode 100644 (file)
index 0000000..9108c8d
--- /dev/null
+++ b/db/type.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+struct field;
+
+#define        szof(x,y)       sizeof(((x *)0)->y)
+#define        szcount(x,y)    (szof(x,y) / szof(x,y[0]))
+
+typedef enum typnm
+{
+       TYP_AGF, TYP_AGFL, TYP_AGI, TYP_ATTR, TYP_BMAPBTA,
+       TYP_BMAPBTD, TYP_BNOBT, TYP_CNTBT, TYP_DATA, TYP_DIR,
+       TYP_DIR2, TYP_DQBLK, TYP_INOBT, TYP_INODATA, TYP_INODE,
+       TYP_LOG, TYP_RTBITMAP, TYP_RTSUMMARY, TYP_SB, TYP_SYMLINK,
+       TYP_NONE
+} typnm_t;
+
+#define DB_WRITE 1
+#define DB_READ  0
+
+typedef void (*opfunc_t)(const struct field *fld, int argc, char **argv);
+typedef void (*pfunc_t)(int action, const struct field *fld, int argc, char **argv);
+
+typedef struct typ
+{
+       typnm_t                 typnm;
+       char                    *name;
+       pfunc_t                 pfunc;
+       const struct field      *fields;
+} typ_t;
+extern const typ_t     typtab[], *cur_typ;
+
+extern void    type_init(void);
+extern void    handle_block(int action, const struct field *fields, int argc,
+                            char **argv);
+extern void    handle_string(int action, const struct field *fields, int argc,
+                             char **argv);
+extern void    handle_struct(int action, const struct field *fields, int argc,
+                             char **argv);
diff --git a/db/uuid.c b/db/uuid.c
new file mode 100644 (file)
index 0000000..1c1bf32
--- /dev/null
+++ b/db/uuid.c
@@ -0,0 +1,364 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "command.h"
+#include "data.h"
+#include "type.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "io.h"
+#include "uuid.h"
+#include "bit.h"
+#include "output.h"
+#include "mount.h"
+
+static int     uuid_f(int argc, char **argv);
+static void     uuid_help(void);
+static int     label_f(int argc, char **argv);
+static void     label_help(void);
+
+static const cmdinfo_t uuid_cmd =
+       { "uuid", NULL, uuid_f, 0, 1, 1, "[uuid]",
+         "write/print FS uuid", uuid_help };
+static const cmdinfo_t label_cmd =
+       { "label", NULL, label_f, 0, 1, 1, "[label]",
+         "write/print FS label", label_help };
+static int     warned;
+
+static void
+uuid_help(void)
+{
+       dbprintf(
+"\n"
+" write/print FS uuid\n"
+"\n"
+" Example:\n"
+"\n"
+" 'uuid'                                      - print UUID\n"
+" 'uuid 01234567-0123-0123-0123-0123456789ab' - write UUID\n"
+" 'uuid generate'                             - generate and write\n"
+" 'uuid rewrite'                              - copy UUID from SB 0\n"
+" 'uuid null'                                 - write a null uuid\n"
+"\n"
+"The print function checks the UUID in each SB and will warn if the UUIDs\n"
+"differ between AGs (the log is not checked). The write commands will\n"
+"set the uuid in all AGs to either a specified value, a newly generated\n"
+"value, the value found in the first superblock (SB 0) or a null value\n"
+"respectively. As a side effect of writing the UUID, the log is cleared\n"
+"(which is fine on a CLEANLY unmounted FS).\n"
+"\n"
+);
+}
+
+static void
+label_help(void)
+{
+       dbprintf(
+"\n"
+" write/print FS label\n"
+"\n"
+" Example:\n"
+"\n"
+" 'label'              - print label\n"
+" 'label 123456789012' - write label\n"
+" 'label --'           - write an empty label\n"
+"\n"
+"The print function checks the label in each SB and will warn if the labels\n"
+"differ between AGs. The write commands will set the label in all AGs to the\n"
+"specified value.  The maximum length of a label is 12 characters - use of a\n"
+"longer label will result in truncation and a warning will be issued.\n"
+"\n"
+);
+}
+
+static int
+get_sb(xfs_agnumber_t agno, xfs_sb_t *sb)
+{
+       push_cur();
+       set_cur(&typtab[TYP_SB], XFS_AG_DADDR(mp, agno, XFS_SB_DADDR), 1,
+               DB_RING_IGN, NULL);
+       if (!iocur_top->data) {
+               dbprintf("can't read superblock for AG %u\n", agno);
+               pop_cur();
+               return 0;
+       }
+
+       libxfs_xlate_sb(iocur_top->data, sb, 1, ARCH_CONVERT, XFS_SB_ALL_BITS);
+       if (sb->sb_magicnum != XFS_SB_MAGIC) {
+               dbprintf("bad sb magic # %#x in AG %u\n",
+                       sb->sb_magicnum, agno);
+                return 0;
+       }
+       if (!XFS_SB_GOOD_VERSION(sb)) {
+               dbprintf("bad sb version # %#x in AG %u\n",
+                       sb->sb_versionnum, agno);
+                return 0;
+       }
+       if (agno == 0 && sb->sb_inprogress != 0) {
+               dbprintf("mkfs not completed successfully\n");
+                return 0;
+       }
+       return 1;
+}
+
+static uuid_t *
+do_uuid(xfs_agnumber_t agno, uuid_t *uuid)
+{
+       xfs_sb_t        tsb;
+       static uuid_t   uu;
+
+       if (!get_sb(agno, &tsb))
+               return NULL;
+
+       if (!uuid) {    /* get uuid */
+               memcpy(&uu, &tsb.sb_uuid, sizeof(uuid_t));
+               pop_cur();
+               return &uu;
+       }
+       /* set uuid */
+       memcpy(&tsb.sb_uuid, uuid, sizeof(uuid_t));
+       libxfs_xlate_sb(iocur_top->data, &tsb, -1, ARCH_CONVERT, XFS_SB_UUID);
+       write_cur();
+       return uuid;
+}
+
+static char *
+do_label(xfs_agnumber_t agno, char *label)
+{
+       size_t          len;
+       xfs_sb_t        tsb;
+       static char     lbl[sizeof(tsb.sb_fname) + 1];
+
+       if (!get_sb(agno, &tsb))
+               return NULL;
+
+       memset(&lbl[0], 0, sizeof(lbl));
+
+       if (!label) {   /* get label */
+               pop_cur();
+               memcpy(&lbl[0], &tsb.sb_fname, sizeof(tsb.sb_fname));
+               return &lbl[0];
+       }
+       /* set label */
+       if ((len = strlen(label)) > sizeof(tsb.sb_fname)) {
+               if (!warned++)
+                       dbprintf("warning: truncating label from %d to %d "
+                               "characters\n", len, sizeof(tsb.sb_fname));
+               len = sizeof(tsb.sb_fname);
+       }
+       if ( len == 2 &&
+            (strcmp(label, "\"\"") == 0 ||
+             strcmp(label, "''")   == 0 ||
+             strcmp(label, "--")   == 0) )
+               label[0] = label[1] = '\0';
+       memset(&tsb.sb_fname, 0, sizeof(tsb.sb_fname));
+       memcpy(&tsb.sb_fname, label, len);
+       memcpy(&lbl[0], &tsb.sb_fname, sizeof(tsb.sb_fname));
+       libxfs_xlate_sb(iocur_top->data, &tsb, -1, ARCH_CONVERT, XFS_SB_FNAME);
+       write_cur();
+       return &lbl[0];
+}
+
+static int
+uuid_f(
+       int             argc,
+       char            **argv)
+{
+       char            bp[40];
+       xfs_agnumber_t  agno;
+        uuid_t          uu;
+        uuid_t          *uup=NULL;
+        
+       if (argc != 1 && argc != 2) {
+           dbprintf("invalid parameters\n");
+           return 0;
+       }
+        
+        if (argc==2) {
+            /* write uuid */
+            
+           if (flag_readonly || !flag_expert_mode) {
+                   dbprintf("%s not started in read-write expert mode, writing disabled\n",
+                           progname);
+                   return 0;
+           }
+            
+            if (!strcasecmp(argv[1], "generate")) {
+                uuid_generate(uu);
+            } else if (!strcasecmp(argv[1], "null")) {
+                uuid_clear(uu);
+            } else if (!strcasecmp(argv[1], "rewrite")) {
+                uup=do_uuid(0, NULL);
+                if (!uup) {
+                    dbprintf("failed to read UUID from AG 0\n");
+                    return 0;
+                }
+                memcpy(&uu, *uup, sizeof(uuid_t));
+               uuid_unparse(uu, bp);
+                dbprintf("old uuid = %s\n", bp);
+            } else {
+                if (uuid_parse(argv[1], uu)) {
+                    dbprintf("invalid uuid\n");
+                    return 0;
+                }
+            }
+            
+            if (mp->m_sb.sb_logstart) {
+                if (xfsargs.logdev) {
+                    dbprintf("external log specified for FS with internal log - aborting \n");
+                    return 0;
+                }
+            } else {
+                if (!xfsargs.logdev) {
+                    dbprintf("no external log specified for FS with external log - aborting\n");
+                    return 0;
+                }
+            }
+            
+            dbprintf("clearing log and setting uuid\n");
+            
+            /* clear log (setting uuid) */
+            
+            if (libxfs_log_clear(
+                    (mp->m_sb.sb_logstart)?xfsargs.ddev:xfsargs.logdev,
+                    XFS_FSB_TO_DADDR(mp, mp->m_sb.sb_logstart),
+                    XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks),
+                    &uu,
+                    XLOG_FMT)) {
+                        dbprintf("error clearing log\n");
+                        return 0;
+                    }
+                
+            
+            dbprintf("writing all SBs\n");
+            
+           for (agno = 0; agno < mp->m_sb.sb_agcount; agno++)
+                if (!do_uuid(agno, &uu)) {
+                    dbprintf("failed to set uuid in AG %d\n", agno);
+                    break;
+                }
+                
+           uuid_unparse(uu, bp);
+            dbprintf("new uuid = %s\n", bp);
+            
+            return 0;
+            
+        } else {
+            /* get (check) uuid */
+            
+           for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
+                uup=do_uuid(agno, NULL);
+                if (!uup) {
+                    dbprintf("failed to read UUID from AG %d\n", agno);
+                    return 0;
+                }
+                if (agno) {
+                    if (memcmp(&uu, uup, sizeof(uuid_t))) {
+                        dbprintf("warning: uuid copies differ\n");
+                        break;
+                    }
+                } else {
+                    memcpy(uu, uup, sizeof(uuid_t));
+                }
+            }
+            if (mp->m_sb.sb_logstart) {
+                if (xfsargs.logdev) 
+                    dbprintf("warning: external log specified for FS with internal log\n");
+            } else {
+                if (!xfsargs.logdev) {
+                    dbprintf("warning: no external log specified for FS with external log\n");
+                }
+            }            
+                
+           uuid_unparse(uu, bp);
+           dbprintf("uuid = %s\n", bp);
+        }
+
+       return 0;
+}
+
+static int
+label_f(
+       int             argc,
+       char            **argv)
+{
+       char            *p = NULL;
+       xfs_sb_t        sb;
+       xfs_agnumber_t  ag;
+        
+       if (argc != 1 && argc != 2) {
+               dbprintf("invalid parameters\n");
+               return 0;
+       }
+
+        if (argc==2) { /* write label */
+               if (flag_readonly || !flag_expert_mode) {
+                       dbprintf("%s not started in read-write expert mode, "
+                               "writing disabled\n", progname);
+                       return 0;
+               }
+
+               dbprintf("writing all SBs\n");
+               for (ag = 0; ag < mp->m_sb.sb_agcount; ag++)
+                       if ((p = do_label(ag, argv[1])) == NULL) {
+                               dbprintf("failed to set label in AG %d\n", ag);
+                               break;
+                       }
+               dbprintf("new label = \"%s\"\n", p);
+       } else {        /* print label */
+               for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
+                       p = do_label(ag, NULL);
+                       if (!p) {
+                               dbprintf("failed to read label in AG %d\n", ag);
+                               return 0;
+                       }
+                       if (!ag)
+                               memcpy(&sb.sb_fname, p, sizeof(sb.sb_fname));
+                       else if (memcmp(&sb.sb_fname, p, sizeof(sb.sb_fname)))
+                               dbprintf("warning: label in AG %d differs\n");
+               }
+               dbprintf("label = \"%s\"\n", p);
+        }
+       return 0;
+}
+
+void
+uuid_init(void)
+{
+       warned = 0;
+       add_command(&label_cmd);
+       add_command(&uuid_cmd);
+}
diff --git a/db/uuid.h b/db/uuid.h
new file mode 100644 (file)
index 0000000..8997237
--- /dev/null
+++ b/db/uuid.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+extern void    uuid_init(void);
diff --git a/db/write.c b/db/write.c
new file mode 100644 (file)
index 0000000..32477f2
--- /dev/null
@@ -0,0 +1,708 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include <ctype.h>
+#include <time.h>
+#include "bit.h"
+#include "block.h"
+#include "command.h"
+#include "data.h"
+#include "type.h"
+#include "faddr.h"
+#include "fprint.h"
+#include "field.h"
+#include "flist.h"
+#include "io.h"
+#include "output.h"
+#include "print.h"
+#include "write.h"
+#include "malloc.h"
+
+static int     write_f(int argc, char **argv);
+static void     write_help(void);
+
+static const cmdinfo_t write_cmd =
+       { "write", NULL, write_f, 0, -1, 0, "[field or value]...",
+         "write value to disk", write_help };
+
+void
+write_init(void)
+{
+       if (!flag_expert_mode)
+               return;
+
+       add_command(&write_cmd);
+       srand48(clock());
+}
+
+static void 
+write_help(void)
+{
+       dbprintf(
+"\n"
+" The 'write' command takes on different personalities depending on the\n"
+" type of object being worked with.\n\n"
+" Write has 3 modes:\n"
+"  'struct mode' - is active anytime you're looking at a filesystem object\n"
+"                  which contains individual fields (ex: an inode).\n"
+"  'data mode'   - is active anytime you set a disk address directly or set\n"
+"                  the type to 'data'.\n"
+"  'string mode' - only used for writing symlink blocks.\n"
+"\n"
+" Examples:\n"
+"  Struct mode: 'write core.uid 23'          - set an inode uid field to 23.\n"
+"               'write fname \"hello\\000\"'    - write superblock fname.\n"
+"               (note: in struct mode strings are not null terminated)\n"
+"               'write fname #6669736800'    - write superblock fname with hex.\n"
+"               'write uuid 00112233-4455-6677-8899-aabbccddeeff'\n" 
+"                                            - write superblock uuid.\n"
+"  Data mode:   'write fill 0xff' - fill the entire block with 0xff's\n"
+"               'write lshift 3' - shift the block 3 bytes to the left\n"
+"               'write sequence 1 5' - write a cycle of number [1-5] through\n"
+"                                      the entire block.\n"
+"  String mode: 'write \"This_is_a_filename\" - write null terminated string.\n"
+"\n"
+" In data mode type 'write' by itself for a list of specific commands.\n\n"
+);
+
+}
+
+static int
+write_f(
+       int             argc,
+       char            **argv)
+{
+       pfunc_t pf;
+       extern char *progname;
+
+       if (flag_readonly) {
+               dbprintf("%s started in read only mode, writing disabled\n",
+                       progname);
+               return 0;
+       }
+
+       if (cur_typ == NULL) {
+               dbprintf("no current type\n");
+               return 0;
+       }
+
+       pf = cur_typ->pfunc;
+       if (pf == NULL) {
+               dbprintf("no handler function for type %s, write unsupported.\n",
+                        cur_typ->name);
+               return 0;
+       }
+
+       /* move past the "write" command */
+       argc--;
+       argv++;
+
+       (*pf)(DB_WRITE, cur_typ->fields, argc, argv);
+
+       return 0;
+}
+
+/* compare significant portions of commands */
+
+static int
+sigcmp(
+       char  *s1,
+       char  *s2,
+       int   sig)
+{
+       int sigcnt;
+
+       if (!s1 || !s2)
+               return 0;
+
+       for (sigcnt = 0; *s1 == *s2; s1++, s2++) {
+               sigcnt++;
+               if (*s1 == '\0')
+                       return 1;
+       }
+       if (*s1 && *s2)
+               return 0;
+
+       if (sig && (sigcnt >= sig))
+               return 1;
+
+       return 0;
+}
+
+/* ARGSUSED */
+static void
+bwrite_lshift(
+       int   start,
+       int   len,
+       int   shift,
+       int   from,
+       int   to)
+{
+       char *base;
+
+       if (shift == -1)
+               shift = 1;
+       if (start == -1)
+               start = 0;
+       if (len == -1)
+               len = iocur_top->len - start;
+
+       if (len+start > iocur_top->len) {
+               dbprintf("length (%d) too large for data block size (%d)",
+                        len, iocur_top->len);
+       }
+
+       base = (char *)iocur_top->data + start;
+
+       memcpy(base, base+shift, len-shift);
+       memset(base+(len-shift), 0, shift);
+}
+
+/* ARGSUSED */
+static void
+bwrite_rshift(
+       int   start,
+       int   len,
+       int   shift,
+       int   from,
+       int   to)
+{
+       char *base;
+
+       if (shift == -1)
+               shift = 1;
+       if (start == -1)
+               start = 0;
+       if (len == -1)
+               len = iocur_top->len - start;
+
+       if (len+start > iocur_top->len) {
+               dbprintf("length (%d) too large for data block size (%d)",
+                        len, iocur_top->len);
+       }
+
+       base = (char *)iocur_top->data + start;
+
+       memcpy(base+shift, base, len-shift);
+       memset(base, 0, shift);
+}
+
+/* ARGSUSED */
+static void
+bwrite_lrot(
+       int   start,
+       int   len,
+       int   shift,
+       int   from,
+       int   to)
+{
+       char *base;
+       char *hold_region;
+
+       if (shift == -1)
+               shift = 1;
+       if (start == -1)
+               start = 0;
+       if (len == -1)
+               len = iocur_top->len - start;
+
+       if (len+start > iocur_top->len) {
+               dbprintf("length (%d) too large for data block size (%d)",
+                        len, iocur_top->len);
+       }
+
+       base = (char *)iocur_top->data + start;
+
+       hold_region = xmalloc(shift);
+       memcpy(hold_region, base, shift);
+       memcpy(base, base+shift, len-shift);
+       memcpy(base+(len-shift), hold_region, shift);
+}
+
+/* ARGSUSED */
+static void
+bwrite_rrot(
+       int   start,
+       int   len,
+       int   shift,
+       int   from,
+       int   to)
+{
+       char *base;
+       char *hold_region;
+
+       if (shift == -1)
+               shift = 1;
+       if (start == -1)
+               start = 0;
+       if (len == -1)
+               len = iocur_top->len - start;
+
+       if (len+start > iocur_top->len) {
+               dbprintf("length (%d) too large for data block size (%d)",
+                        len, iocur_top->len);
+       }
+
+       base = (char *)iocur_top->data + start;
+
+       hold_region = xmalloc(shift);
+       memcpy(hold_region, base+(len-shift), shift);
+       memcpy(base+shift, base, len-shift);
+       memcpy(base, hold_region, shift);
+}
+
+/* ARGSUSED */
+static void
+bwrite_seq(
+       int   start,
+       int   len,
+       int   step,
+       int   from,
+       int   to)
+{
+       int i;
+       int tmp;
+       int base;
+       int range;
+       int top;
+       char *buf = (char *)iocur_top->data;
+
+       if (start == -1)
+               start = 0;
+
+       if (len == -1)
+               len = iocur_top->len - start;
+
+       if (len+start > iocur_top->len) {
+               dbprintf("length (%d) too large for data block size (%d)",
+                        len, iocur_top->len);
+       }
+
+       if (from == -1 || from > 255)
+               from = 0;
+       if (to == -1 || to > 255)
+               to = 255;
+       if (step == -1)
+               step = 1;
+
+       base = from;
+       top = to;
+       if (from > to) {
+               base = to;
+               top = from;
+               if (step > 0)
+                       step = -step;
+       }
+
+       range = top - base;
+
+       tmp = 0;
+       for (i = start; i < start+len; i++) {
+               *buf++ = tmp + base;
+               tmp = (tmp + step)%(range+1);
+       }
+}
+
+/* ARGSUSED */
+static void
+bwrite_random(
+       int   start,
+       int   len,
+       int   shift,
+       int   from,
+       int   to)
+{
+       int i;
+       char *buf = (char *)iocur_top->data;
+
+       if (start == -1)
+               start = 0;
+
+       if (len == -1)
+               len = iocur_top->len - start;
+
+       if (len+start > iocur_top->len) {
+               dbprintf("length (%d) too large for data block size (%d)",
+                        len, iocur_top->len);
+       }
+
+       for (i = start; i < start+len; i++)
+               *buf++ = (char)lrand48();
+}
+
+/* ARGSUSED */
+static void
+bwrite_fill(
+       int   start,
+       int   len,
+       int   value,
+       int   from,
+       int   to)
+{
+       char *base;
+
+       if (value == -1)
+               value = 0;
+       if (start == -1)
+               start = 0;
+       if (len == -1)
+               len = iocur_top->len - start;
+
+       if (len+start > iocur_top->len) {
+               dbprintf("length (%d) too large for data block size (%d)",
+                        len, iocur_top->len);
+       }
+
+       base = (char *)iocur_top->data + start;
+
+       memset(base, value, len);
+}
+
+static struct bw_cmd {
+       void    (*cmdfunc)(int,int,int,int,int);
+       char    *cmdstr;
+       int     sig_chars;
+       int     argmin;
+       int     argmax;
+       int     shiftcount_arg;
+       int     from_arg;
+       int     to_arg;
+       int     start_arg;
+       int     len_arg;
+       char    *usage;
+} bw_cmdtab[] = {
+       /* cmd   sig min max sh frm to start len */
+       { bwrite_lshift, "lshift",   2, 0, 3, 1, 0, 0, 2, 3,
+               "[shiftcount] [start] [len]", },
+       { bwrite_rshift, "rshift",   2, 0, 3, 1, 0, 0, 2, 3,
+               "[shiftcount] [start] [len]", },
+       { bwrite_lrot,   "lrot",     2, 0, 3, 1, 0, 0, 2, 3,
+               "[shiftcount] [start] [len]", },
+       { bwrite_rrot,   "rrot",     2, 0, 3, 1, 0, 0, 2, 3,
+               "[shiftcount] [start] [len]", },
+       { bwrite_seq,    "sequence", 3, 0, 4, 0, 1, 2, 3, 4,
+               "[from] [to] [start] [len]", },
+       { bwrite_random, "random",   3, 0, 2, 0, 0, 0, 1, 2,
+               "[start] [len]", },
+       { bwrite_fill,   "fill",     1, 1, 3, 1, 0, 0, 2, 3,
+               "num [start] [len]" }
+};
+
+#define BWRITE_CMD_MAX (sizeof(bw_cmdtab)/sizeof(bw_cmdtab[0]))
+
+static int
+convert_oct(
+       char *arg,
+       int  *ret)
+{
+       int count;
+       int i;
+       int val = 0;
+
+       /* only allow 1 case, '\' and 3 octal digits (or less) */
+
+       for (count = 0; count < 3; count++) {
+               if (arg[count] == '\0')
+                       break;
+
+               if ((arg[count] < '0') && (arg[count] > '7'))
+                       break;
+       }
+
+       for (i = 0; i < count; i++) {
+               val |= ((arg[(count-1)-i]-'0')&0x07)<<(i*3);
+       }
+
+       *ret = val&0xff;
+
+       return(count);
+}
+
+#define NYBBLE(x) (isdigit(x)?(x-'0'):(tolower(x)-'a'+0xa))
+
+static char *
+convert_arg(
+       char *arg,
+       int  bit_length)
+{
+       int i;
+       static char *buf = NULL;
+       char *rbuf;
+       long long *value;
+       int alloc_size;
+       char *ostr;
+       int octval, ret;
+
+       if (bit_length <= 64)
+               alloc_size = 8;
+       else
+               alloc_size = (bit_length+7)/8;
+
+       buf = xrealloc(buf, alloc_size);
+       memset(buf, 0, alloc_size);
+       value = (long long *)buf;
+       rbuf = buf;
+
+       if (*arg == '\"') {
+                /* handle strings */
+
+               /* zap closing quote if there is one */
+               if ((ostr = strrchr(arg+1, '\"')) != NULL)
+                       *ostr = '\0';
+
+               ostr = arg+1;
+               for (i = 0; i < alloc_size; i++) {
+                       if (!*ostr)
+                               break;
+
+                       /* do octal */
+                       if (*ostr == '\\') {
+                               if (*(ostr+1) >= '0' || *(ostr+1) <= '7') {
+                                       ret = convert_oct(ostr+1, &octval);
+                                       *rbuf++ = octval;
+                                       ostr += ret+1;
+                                       continue;
+                               }
+                       }
+                       *rbuf++ = *ostr++;
+               }
+
+               return buf;
+        } else if (arg[0] == '#' || strchr(arg,'-')) {
+                /*
+                 * handle hex blocks ie
+                 *    #00112233445566778899aabbccddeeff
+                 * and uuids ie 
+                 *    1122334455667788-99aa-bbcc-ddee-ff00112233445566778899
+                 */
+                int bytes=bit_length/8;
+                
+                /* skip leading hash */
+                if (*arg=='#') arg++;
+                    
+                while (*arg && bytes--) {
+                    /* skip hypens */
+                    while (*arg=='-') arg++;
+                    
+                    /* get first nybble */
+                    if (!isxdigit(*arg)) return NULL;
+                    *rbuf=NYBBLE(*arg)<<4;
+                    arg++;
+                    
+                    /* skip more hyphens */
+                    while (*arg=='-') arg++;
+                    
+                    /* get second nybble */
+                    if (!isxdigit(*arg)) return NULL;
+                    *rbuf++|=NYBBLE(*arg);
+                    arg++;
+                }
+                if (bytes<0&&*arg) return NULL;
+                return buf;
+       } else {
+                /*
+                 * handle integers
+                 */
+               *value = strtoll(arg, NULL, 0);
+                
+#if __BYTE_ORDER == BIG_ENDIAN
+               /* hackery for big endian */
+               if (bit_length <= 8) {
+                       rbuf += 7;
+               } else if (bit_length <= 16) {
+                       rbuf += 6;
+               } else if (bit_length <= 32) {
+                       rbuf += 4;
+               }
+#endif
+               return rbuf;
+       }
+}
+
+
+/* ARGSUSED */
+void
+write_struct(
+       const field_t   *fields,
+       int             argc,
+       char            **argv)
+{
+       const ftattr_t  *fa;
+       flist_t         *fl;
+       flist_t         *sfl;
+       int             bit_length;
+       char            *buf;
+       int             parentoffset;
+
+       if (argc != 2) {
+               dbprintf("usage: write fieldname value\n");
+               return;
+       }
+
+       fl = flist_scan(argv[0]);
+       if (!fl) {
+               dbprintf("unable to parse '%s'.\n", argv[0]);
+               return;
+       }
+
+       /* if we're a root field type, go down 1 layer to get field list */
+       if (fields->name[0] == '\0') {
+               fa = &ftattrtab[fields->ftyp];
+               ASSERT(fa->ftyp == fields->ftyp);
+               fields = fa->subfld;
+       }
+
+       /* run down the field list and set offsets into the data */
+       if (!flist_parse(fields, fl, iocur_top->data, 0)) {
+               flist_free(fl);
+               dbprintf("parsing error\n");
+               return;
+       }
+
+       sfl = fl;
+       parentoffset = 0;
+       while (sfl->child) {
+               parentoffset = sfl->offset;
+               sfl = sfl->child;
+       }
+
+       bit_length = fsize(sfl->fld, iocur_top->data, parentoffset, 0);
+       bit_length *= fcount(sfl->fld, iocur_top->data, parentoffset);
+
+       /* convert this to a generic conversion routine */
+       /* should be able to handle str, num, or even labels */
+       
+       buf = convert_arg(argv[1], bit_length);
+       if (!buf) {
+               dbprintf("unable to convert value '%s'.\n", argv[1]);
+               return;
+       }
+
+        setbitval(iocur_top->data, sfl->offset, bit_length, buf);
+       write_cur();
+
+       flist_print(fl);
+       print_flist(fl); 
+       flist_free(fl);
+}
+
+/* ARGSUSED */
+void
+write_string(
+       const field_t   *fields,
+       int             argc,
+       char            **argv)
+{
+       char *buf;
+       int i;
+
+       if (argc != 1) {
+               dbprintf("usage (in string mode): write \"string...\"\n");
+               return;
+       }
+
+       buf = convert_arg(argv[0], (int)((strlen(argv[0])+1)*8));
+       for (i = 0; i < iocur_top->len; i++) {
+               ((char *)iocur_top->data)[i] = *buf;
+               if (*buf++ == '\0')
+                       break;
+       }
+
+       /* write back to disk */
+       write_cur();
+}
+
+/* ARGSUSED */
+void
+write_block(
+       const field_t   *fields,
+       int             argc,
+       char            **argv)
+{
+       int i;
+       int shiftcount = -1; 
+       int start = -1;
+       int len = -1;
+       int from = -1;
+       int to = -1;
+       struct bw_cmd *cmd = NULL;
+
+       if (argc <= 1 || argc > 5)
+               goto block_usage;
+
+       for (i = 0; i < BWRITE_CMD_MAX; i++) {
+               if (sigcmp(argv[0], bw_cmdtab[i].cmdstr,
+                          bw_cmdtab[i].sig_chars)) {
+                       cmd = &bw_cmdtab[i];
+                       break;
+               }
+       }
+
+       if (!cmd) {
+               dbprintf("write: invalid subcommand\n");
+               goto block_usage;
+       }
+
+       if ((argc < cmd->argmin + 1) || (argc > cmd->argmax + 1)) {
+               dbprintf("write %s: invalid number of arguments\n",
+                        cmd->cmdstr);
+               goto block_usage;
+       }
+       
+       if (cmd->shiftcount_arg && (cmd->shiftcount_arg < argc))
+               shiftcount = (int)strtoul(argv[cmd->shiftcount_arg], NULL, 0);
+       if (cmd->start_arg && (cmd->start_arg < argc))
+               start =  (int)strtoul(argv[cmd->start_arg], NULL, 0);
+       if (cmd->len_arg && (cmd->len_arg < argc))
+               len = (int)strtoul(argv[cmd->len_arg], NULL, 0);
+       if (cmd->from_arg  && (cmd->len_arg < argc))
+               from = (int)strtoul(argv[cmd->from_arg], NULL, 0);
+       if (cmd->to_arg && (cmd->len_arg < argc))
+               to = (int)strtoul(argv[cmd->to_arg], NULL, 0);
+
+       cmd->cmdfunc(start, len, shiftcount, from, to);
+
+       /* write back to disk */
+       write_cur();
+       return;
+
+  block_usage:
+
+       dbprintf("usage: write (in data mode)\n");
+       for (i = 0; i < BWRITE_CMD_MAX; i++) {
+               dbprintf("              %-9.9s %s\n",
+                        bw_cmdtab[i].cmdstr, bw_cmdtab[i].usage);
+       }
+       dbprintf("\n");
+       return;
+}
diff --git a/db/write.h b/db/write.h
new file mode 100644 (file)
index 0000000..7e0596f
--- /dev/null
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+struct field;
+
+extern void    write_init(void);
+extern void    write_block(const field_t *fields, int argc, char **argv);
+extern void    write_string(const field_t *fields, int argc, char **argv);
+extern void    write_struct(const field_t *fields, int argc, char **argv);
diff --git a/db/xfs_admin.sh b/db/xfs_admin.sh
new file mode 100755 (executable)
index 0000000..c516ae9
--- /dev/null
@@ -0,0 +1,60 @@
+#!/bin/sh -f
+#
+# Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+# 
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# 
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like.  Any license provided herein, whether implied or
+# otherwise, applies only to this software file.  Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write the Free Software Foundation, Inc., 59
+# Temple Place - Suite 330, Boston MA 02111-1307, USA.
+# 
+# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+# Mountain View, CA  94043, or:
+# 
+# http://www.sgi.com 
+# 
+# For further information regarding this notice, see: 
+# 
+# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+#
+
+OPTS=""
+USAGE="Usage: xfs_admin [-flu] [-L label] [-U uuid] special"
+
+while getopts "fluL:U:" c
+do
+       case $c in
+       f)      OPTS=$OPTS" -f";;
+       l)      OPTS=$OPTS" -c label";;
+       L)      OPTS=$OPTS" -c 'label "$OPTARG"'";;
+       u)      OPTS=$OPTS" -c uuid";;
+       U)      OPTS=$OPTS" -c 'uuid "$OPTARG"'";;
+       \?)     echo $USAGE 1>&2
+               exit 2
+               ;;
+       esac
+done
+set -- extra $@
+shift $OPTIND
+case $# in
+       1)      eval xfs_db -x -p xfs_admin $OPTS $1
+               status=$?
+               ;;
+       *)      echo $USAGE 1>&2
+               exit 2
+               ;;
+esac
+exit $status
diff --git a/db/xfs_check.sh b/db/xfs_check.sh
new file mode 100755 (executable)
index 0000000..bff2ecc
--- /dev/null
@@ -0,0 +1,63 @@
+#!/bin/sh -f
+#
+# Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+# 
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# 
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like.  Any license provided herein, whether implied or
+# otherwise, applies only to this software file.  Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write the Free Software Foundation, Inc., 59
+# Temple Place - Suite 330, Boston MA 02111-1307, USA.
+# 
+# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+# Mountain View, CA  94043, or:
+# 
+# http://www.sgi.com 
+# 
+# For further information regarding this notice, see: 
+# 
+# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+#
+#ident "$Revision: 1.1 $"
+
+OPTS=" "
+ISFILE=" "
+USAGE="usage: xfs_check [-svf] [-i ino]... [-b bno]... special"
+
+
+while getopts "b:fi:sv" c
+do
+       case $c in
+       s)      OPTS=$OPTS"-s ";;
+       v)      OPTS=$OPTS"-v ";;
+       i)      OPTS=$OPTS"-i "$OPTARG" ";;
+       b)      OPTS=$OPTS"-b "$OPTARG" ";;
+       f)      ISFILE=" -f";;
+       \?)     echo $USAGE 1>&2
+               exit 2
+               ;;
+       esac
+done
+set -- extra $@
+shift $OPTIND
+case $# in
+       1)      xfs_db$ISFILE -i -p xfs_check -c "check$OPTS" $1
+               status=$?
+               ;;
+       *)      echo $USAGE 1>&2
+               exit 2
+               ;;
+esac
+exit $status
diff --git a/db/xfs_check64.sh b/db/xfs_check64.sh
new file mode 100755 (executable)
index 0000000..9309390
--- /dev/null
@@ -0,0 +1,63 @@
+#!/bin/sh -f
+#
+# Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+# 
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# 
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like.  Any license provided herein, whether implied or
+# otherwise, applies only to this software file.  Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write the Free Software Foundation, Inc., 59
+# Temple Place - Suite 330, Boston MA 02111-1307, USA.
+# 
+# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+# Mountain View, CA  94043, or:
+# 
+# http://www.sgi.com 
+# 
+# For further information regarding this notice, see: 
+# 
+# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+#
+#ident "$Revision: 1.1 $"
+
+OPTS=" "
+ISFILE=" "
+USAGE="usage: xfs_check64 [-svf] [-i ino]... [-b bno]... special"
+
+
+while getopts "b:fi:sv" c
+do
+       case $c in
+       s)      OPTS=$OPTS"-s ";;
+       v)      OPTS=$OPTS"-v ";;
+       i)      OPTS=$OPTS"-i "$OPTARG" ";;
+       b)      OPTS=$OPTS"-b "$OPTARG" ";;
+       f)      ISFILE=" -f";;
+       \?)     echo $USAGE 1>&2
+               exit 2
+               ;;
+       esac
+done
+set -- extra $@
+shift $OPTIND
+case $# in
+       1)      xfs_db64$ISFILE -i -p xfs_check64 -c "check$OPTS" $1
+               status=$?
+               ;;
+       *)      echo $USAGE 1>&2
+               exit 2
+               ;;
+esac
+exit $status
diff --git a/db/xfs_ncheck.sh b/db/xfs_ncheck.sh
new file mode 100755 (executable)
index 0000000..3c83e35
--- /dev/null
@@ -0,0 +1,61 @@
+#!/bin/sh -f
+#
+# Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+# 
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# 
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like.  Any license provided herein, whether implied or
+# otherwise, applies only to this software file.  Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write the Free Software Foundation, Inc., 59
+# Temple Place - Suite 330, Boston MA 02111-1307, USA.
+# 
+# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+# Mountain View, CA  94043, or:
+# 
+# http://www.sgi.com 
+# 
+# For further information regarding this notice, see: 
+# 
+# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+#
+#ident "$Revision: 1.1 $"
+
+OPTS=" "
+ISFILE=" "
+USAGE="usage: xfs_ncheck [-sf] [-i ino]... special"
+
+
+while getopts "b:fi:sv" c
+do
+       case $c in
+       s)      OPTS=$OPTS"-s ";;
+       i)      OPTS=$OPTS"-i "$OPTARG" ";;
+       f)      ISFILE=" -f";;
+       \?)     echo $USAGE 1>&2
+               exit 2
+               ;;
+       esac
+done
+set -- extra $@
+shift $OPTIND
+case $# in
+       1)      xfs_db$ISFILE -r -p xfs_ncheck -c "blockget -ns" -c "ncheck$OPTS" $1
+               status=$?
+               ;;
+       *)      echo $USAGE 1>&2
+               exit 2
+               ;;
+esac
+exit $status
diff --git a/db/xfs_ncheck64.sh b/db/xfs_ncheck64.sh
new file mode 100755 (executable)
index 0000000..7fcd3a3
--- /dev/null
@@ -0,0 +1,61 @@
+#!/bin/sh -f
+#
+# Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+# 
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# 
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like.  Any license provided herein, whether implied or
+# otherwise, applies only to this software file.  Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write the Free Software Foundation, Inc., 59
+# Temple Place - Suite 330, Boston MA 02111-1307, USA.
+# 
+# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+# Mountain View, CA  94043, or:
+# 
+# http://www.sgi.com 
+# 
+# For further information regarding this notice, see: 
+# 
+# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+#
+#ident "$Revision: 1.1 $"
+
+OPTS=" "
+ISFILE=" "
+USAGE="usage: xfs_ncheck64 [-sf] [-i ino]... special"
+
+
+while getopts "b:fi:sv" c
+do
+       case $c in
+       s)      OPTS=$OPTS"-s ";;
+       i)      OPTS=$OPTS"-i "$OPTARG" ";;
+       f)      ISFILE=" -f";;
+       \?)     echo $USAGE 1>&2
+               exit 2
+               ;;
+       esac
+done
+set -- extra $@
+shift $OPTIND
+case $# in
+       1)      xfs_db64$ISFILE -r -p xfs_ncheck64 -c "blockget -ns" -c "ncheck$OPTS" $1
+               status=$?
+               ;;
+       *)      echo $USAGE 1>&2
+               exit 2
+               ;;
+esac
+exit $status
diff --git a/doc/Makefile b/doc/Makefile
new file mode 100644 (file)
index 0000000..abd9685
--- /dev/null
@@ -0,0 +1,45 @@
+#
+# Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+# 
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# 
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like.  Any license provided herein, whether implied or
+# otherwise, applies only to this software file.  Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write the Free Software Foundation, Inc., 59
+# Temple Place - Suite 330, Boston MA 02111-1307, USA.
+# 
+# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+# Mountain View, CA  94043, or:
+# 
+# http://www.sgi.com 
+# 
+# For further information regarding this notice, see: 
+# 
+# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+#
+
+TOPDIR = ..
+include $(TOPDIR)/include/builddefs
+
+DOCFILES = README.LVM README.xfsdump Porting-Guide
+LSRCFILES = $(DOCFILES)
+
+default: $(CMDTARGET)
+
+include $(BUILDRULES)
+
+install: default
+       $(INSTALL) -m 755 -d $(XFS_CMDS_DOC_DIR)
+       $(INSTALL) -m 644 $(DOCFILES) $(XFS_CMDS_DOC_DIR)
diff --git a/doc/README.LVM b/doc/README.LVM
new file mode 100644 (file)
index 0000000..06eb6de
--- /dev/null
@@ -0,0 +1,77 @@
+XFS on LVM
+__________
+
+PREFACE
+
+This is a quick reference to setting XFS up on LVM. For more information
+please see the LVM HOWTO at:
+
+                http://www.linuxdoc.org/HOWTO/LVM-HOWTO.html
+
+PREREQUISITES
+
+You need a kernel with LVM support either built in or as a module.
+This document assumes lvm as a module.
+
+SETTING UP LVM
+
+>>> Load module
+
+      [root@crash /sbin]# modprobe lvm-mod
+
+>>> Set partition type to 0x8e for partitions you wish to use with LVM
+
+      [root@crash /sbin]# fdisk /dev/sda1
+      Command (m for help): t
+      Partition number (1-4): 1
+      Hex code (type L to list codes): 8e
+      Changed system type of partition 1 to 8e (Unknown)
+
+      Command (m for help): w
+      The partition table has been altered!
+
+>>> Write PV superblock on physical volumes
+
+      [root@crash /root]# pvcreate /dev/sda1 /dev/sdb1 /dev/sdc1 /dev/sdd1
+      pvcreate -- physical volume "/dev/sda1" successfully created
+      pvcreate -- physical volume "/dev/sdb1" successfully created
+      pvcreate -- physical volume "/dev/sdc1" successfully created
+      pvcreate -- physical volume "/dev/sdd1" successfully created
+
+>>> Create a volume group consisting of the PVs we just set up
+
+[root@crash /root]# vgcreate vg00 /dev/sda1 /dev/sdb1 /dev/sdc1 /dev/sdd1
+      vgcreate -- INFO: using default physical extent size 4 MB
+      vgcreate -- INFO: maximum logical volume size is 255.99 Gigabyte
+      vgcreate -- doing automatic backup of volume group "vg00"
+      vgcreate -- volume group "vg00" successfully created and activated
+
+>>> Create a logical volume - striped across 4 PVs, 64 KB chunk size, 20 GB
+
+[root@crash /root]# lvcreate -i 4 -I 64 -L 20G -n lv00 vg00
+      lvcreate -- rounding 20971520 KB to stripe boundary size 20975616 KB / 5121 PE
+      lvcreate -- doing automatic backup of "vg00"
+      lvcreate -- logical volume "/dev/vg00/lv00" successfully created
+
+>>> Build a filesystem on the LV
+
+[root@crash /root]# mkfs -t xfs /dev/vg00/lv00 
+      meta-data=/dev/vg00/lv00         isize=256    agcount=20, agsize=262144 blks
+      data     =                       bsize=4096   blocks=5242879, imaxpct=25
+               =                       sunit=0      swidth=0 blks, unwritten=1
+      naming   =version 2              bsize=4096  
+      log      =internal log           bsize=4096   blocks=1200
+      realtime =none                   extsz=65536  blocks=0, rtextents=0
+
+[root@crash /root]# mount -t xfs /dev/vg00/lv00 /xfs
+
+>>> Go nuts
+
+
+After a reboot you will need to reactivate the VGs/LVs:
+
+      modprobe lvm-mod
+      vgchange -a y
+
+These commands could be added to a startup script.
+
diff --git a/fsck/Makefile b/fsck/Makefile
new file mode 100644 (file)
index 0000000..965dca0
--- /dev/null
@@ -0,0 +1,46 @@
+#
+# Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+# 
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# 
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like.  Any license provided herein, whether implied or
+# otherwise, applies only to this software file.  Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write the Free Software Foundation, Inc., 59
+# Temple Place - Suite 330, Boston MA 02111-1307, USA.
+# 
+# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+# Mountain View, CA  94043, or:
+# 
+# http://www.sgi.com 
+# 
+# For further information regarding this notice, see: 
+# 
+# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+#
+
+TOPDIR = ..
+include $(TOPDIR)/include/builddefs
+
+CMDTARGET = fsck.xfs
+CFILES = xfs_fsck.c
+LCFLAGS = -s -O3
+
+default: $(CMDTARGET)
+
+include $(BUILDRULES)
+
+install: default
+       $(INSTALL) -m 755 -d $(XFS_CMDS_SBIN_DIR)
+       $(INSTALL) -m 755 $(CMDTARGET) $(XFS_CMDS_SBIN_DIR)
diff --git a/fsck/xfs_fsck.c b/fsck/xfs_fsck.c
new file mode 100644 (file)
index 0000000..edb8746
--- /dev/null
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+/* http://www.muppetlabs.com/~breadbox/software/tiny/teensy.html */
+/*   Unfortunately, we need to be a little more portable.  ;^)   */
+/* This used to be a symlink to /bin/true but that gives a wierd */
+/* dependency problem in a certain package manager.              */
+
+int
+main(int argc, char **argv)
+{
+       return 0;
+}
diff --git a/growfs/Makefile b/growfs/Makefile
new file mode 100644 (file)
index 0000000..f0bf761
--- /dev/null
@@ -0,0 +1,50 @@
+#
+# Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+# 
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# 
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like.  Any license provided herein, whether implied or
+# otherwise, applies only to this software file.  Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write the Free Software Foundation, Inc., 59
+# Temple Place - Suite 330, Boston MA 02111-1307, USA.
+# 
+# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+# Mountain View, CA  94043, or:
+# 
+# http://www.sgi.com 
+# 
+# For further information regarding this notice, see: 
+# 
+# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+#
+
+TOPDIR = ..
+include $(TOPDIR)/include/builddefs
+
+CMDTARGET = xfs_growfs
+CMDDEPS = $(LIBXFS)
+
+CFILES = xfs_growfs.c
+LLDLIBS = $(LIBXFS) $(LIBUUID)
+LSRCFILES = xfs_info.sh
+
+default: $(CMDTARGET)
+
+include $(BUILDRULES)
+
+install: default
+       $(INSTALL) -m 755 -d $(XFS_CMDS_BIN_DIR)
+       $(INSTALL) -m 755 $(CMDTARGET) $(XFS_CMDS_BIN_DIR)
+       $(INSTALL) -m 755 xfs_info.sh $(XFS_CMDS_BIN_DIR)/xfs_info
diff --git a/growfs/xfs_growfs.c b/growfs/xfs_growfs.c
new file mode 100644 (file)
index 0000000..555f924
--- /dev/null
@@ -0,0 +1,458 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <libxfs.h>
+#include <mntent.h>
+#include <sys/ioctl.h>
+
+static char    *fname;         /* mount point name */
+static char    *datadev;       /* data device name */
+static char    *logdev;        /*  log device name */
+static char    *rtdev;         /*   RT device name */
+
+static void
+usage(void)
+{
+       fprintf(stderr,
+"Usage: %s [options] mountpoint\n\n\
+Options:\n\
+        -d          grow data/metadata section\n\
+        -l          grow log section\n\
+        -r          grow realtime section\n\
+        -n          don't change anything, just show geometry\n\
+        -i          convert log from external to internal format\n\
+        -t          alternate location for mount table (/etc/mtab)\n\
+        -x          convert log from internal to external format\n\
+        -D size     grow data/metadata section to size blks\n\
+        -L size     grow/shrink log section to size blks\n\
+        -R size     grow realtime section to size blks\n\
+        -e size     set realtime extent size to size blks\n\
+        -m imaxpct  set inode max percent to imaxpct\n\
+        -V          print version information\n",
+               progname);
+       exit(2);
+}
+
+void
+report_info(
+       xfs_fsop_geom_t geo,
+       char            *mntpoint,
+       int             unwritten,
+       int             dirversion,
+       int             isint)
+{
+       printf("meta-data=%-22s isize=%-6d agcount=%d, agsize=%d blks\n"
+              "data     =%-22s bsize=%-6d blocks=%lld, imaxpct=%d\n"
+              "         =%-22s sunit=%-6d swidth=%d blks, unwritten=%d\n"
+              "naming   =version %-14d bsize=%-6d\n"
+              "log      =%-22s bsize=%-6d blocks=%d\n"
+              "realtime =%-22s extsz=%-6d blocks=%lld, rtextents=%lld\n",
+              mntpoint, geo.inodesize, geo.agcount, geo.agblocks,
+              "", geo.blocksize, geo.datablocks, geo.imaxpct,
+              "", geo.sunit, geo.swidth, unwritten,
+              dirversion, geo.dirblocksize,
+              isint ? "internal" : "external", geo.blocksize, geo.logblocks,
+              geo.rtblocks ? "external" : "none",
+              geo.rtextsize * geo.blocksize, geo.rtblocks, geo.rtextents);
+}
+
+void
+explore_mtab(char *mtab, char *mntpoint)
+{
+       struct mntent   *mnt;
+       struct stat64   statuser;
+       struct stat64   statmtab;
+       FILE            *mtp;
+       char            *rtend;
+       char            *logend;
+
+       if ((mtp = setmntent(mtab, "r")) == NULL) {
+               fprintf(stderr, "%s: cannot access mount list %s: %s\n",
+                       progname, MOUNTED, strerror(errno));
+               exit(1);
+       }
+       if (stat64(mntpoint, &statuser) < 0) {
+               fprintf(stderr, "%s: cannot access mount point %s: %s\n",
+                       progname, mntpoint, strerror(errno));
+               exit(1);
+       }
+
+       while ((mnt = getmntent(mtp)) != NULL) {
+               if (stat64(mnt->mnt_dir, &statmtab) < 0) {
+                       fprintf(stderr, "%s: ignoring entry %s in %s: %s\n",
+                               progname, mnt->mnt_dir, mtab, strerror(errno));
+                       continue;
+               }
+               if (statuser.st_ino != statmtab.st_ino ||
+                               statuser.st_dev != statmtab.st_dev)
+                       continue;
+               else if (strcmp(mnt->mnt_type, "xfs") != 0) {
+                       fprintf(stderr, "%s: %s is not an XFS filesystem\n",
+                               progname, mntpoint);
+                       exit(1);
+               }
+               break;  /* we've found it */
+       }
+
+       if (mnt == NULL) {
+               fprintf(stderr,
+               "%s: %s is not a filesystem mount point, according to %s\n",
+                       progname, mntpoint, MOUNTED);
+               exit(1);
+       }
+
+       /* find the data, log (logdev=), and realtime (rtdev=) devices */
+       rtend = logend = NULL;
+       fname = mnt->mnt_dir;
+       datadev = mnt->mnt_fsname;
+       if (logdev = hasmntopt(mnt, "logdev=")) {
+               logdev += 7;
+               logend = strtok(logdev, " ");
+       }
+       if (rtdev = hasmntopt(mnt, "rtdev=")) {
+               rtdev += 6;
+               rtend = strtok(rtdev, " ");
+       }
+
+       /* Do this only after we've finished processing mount options */
+       if (logdev && logend != logdev)
+               *logend = '\0'; /* terminate end of log device name */
+       if (rtdev && rtend != rtdev)
+               *rtend = '\0';  /* terminate end of rt device name */
+
+       endmntent(mtp);
+}
+
+int
+main(int argc, char **argv)
+{
+       int                     aflag;  /* fake flag, do all pieces */
+       int                     c;      /* current option character */
+       long long               ddsize; /* device size in 512-byte blocks */
+       int                     dflag;  /* -d flag */
+       int                     dirversion; /* directory version number */
+       long long               dlsize; /* device size in 512-byte blocks */
+       long long               drsize; /* device size in 512-byte blocks */
+       long long               dsize;  /* new data size in fs blocks */
+       int                     error;  /* we have hit an error */
+       long                    esize;  /* new rt extent size */
+       int                     ffd;    /* mount point file descriptor */
+       xfs_fsop_geom_t         geo;    /* current fs geometry */
+       int                     iflag;  /* -i flag */
+       int                     isint;  /* log is currently internal */
+       int                     lflag;  /* -l flag */
+       long long               lsize;  /* new log size in fs blocks */
+       int                     maxpct; /* -m flag value */
+       int                     mflag;  /* -m flag */
+       char                    *mtab;  /* mount table file (/etc/mtab) */
+       int                     nflag;  /* -n flag */
+       xfs_fsop_geom_t         ngeo;   /* new fs geometry */
+       int                     rflag;  /* -r flag */
+       long long               rsize;  /* new rt size in fs blocks */
+       int                     unwritten; /* unwritten extent flag */
+       int                     xflag;  /* -x flag */
+       libxfs_init_t           xi;     /* libxfs structure */
+
+       mtab = MOUNTED;
+       progname = basename(argv[0]);
+       aflag = dflag = iflag = lflag = mflag = nflag = rflag = xflag = 0;
+       maxpct = esize = 0;
+       dsize = lsize = rsize = 0LL;
+       while ((c = getopt(argc, argv, "dD:e:ilL:m:np:rR:t:xV")) != EOF) {
+               switch (c) {
+               case 'D':
+                       dsize = atoll(optarg);
+                       /* fall through */
+               case 'd':
+                       dflag = 1;
+                       break;
+               case 'e':
+                       esize = atol(optarg);
+                       rflag = 1;
+                       break;
+               case 'i':
+                       lflag = iflag = 1;
+                       break;
+               case 'L':
+                       lsize = atoll(optarg);
+                       /* fall through */
+               case 'l':
+                       lflag = 1;
+                       break;
+               case 'm':
+                       mflag = 1;
+                       maxpct = atoi(optarg);
+                       break;
+               case 'n':
+                       nflag = 1;
+                       break;
+               case 'p':
+                       progname = optarg;
+                       break;
+               case 'R':
+                       rsize = atoll(optarg);
+                       /* fall through */
+               case 'r':
+                       rflag = 1;
+                       break;
+               case 't':
+                       mtab = optarg;
+                       break;
+               case 'x':
+                       lflag = xflag = 1;
+                       break;
+               case 'V':
+                       printf("%s version %s\n", progname, VERSION);
+                       break;
+               case '?':
+               default:
+                       usage();
+               }
+       }
+       if (argc - optind != 1)
+               usage();
+       if (iflag && xflag)
+               usage();
+       if (dflag + lflag + rflag == 0)
+               aflag = 1;
+
+       explore_mtab(mtab, argv[optind]);
+
+       ffd = open(fname, O_RDONLY);
+       if (ffd < 0) {
+               perror(fname);
+               return 1;
+       }
+
+       /* get the current filesystem size & geometry */
+       if (ioctl(ffd, XFS_IOC_FSGEOMETRY, &geo) < 0) {
+               fprintf(stderr, "%s: cannot determine geometry of filesystem"
+                       " mounted at %s: %s\n",
+                       progname, fname, strerror(errno));
+               exit(1);
+       }
+       isint = geo.logstart > 0;
+       unwritten = geo.flags & XFS_FSOP_GEOM_FLAGS_EXTFLG ? 1 : 0;
+       dirversion = geo.flags & XFS_FSOP_GEOM_FLAGS_DIRV2 ? 2 : 1;
+
+       if (nflag) {
+               report_info(geo, fname, unwritten, dirversion, isint);
+               exit(0);
+       }
+
+       /*
+        * Need root access from here on (using raw devices)...
+        */
+
+       bzero(&xi, sizeof(xi));
+       xi.dname = datadev;
+       xi.logname = logdev;
+       xi.rtname = rtdev;
+       xi.notvolok = 1;
+       xi.isreadonly = LIBXFS_ISREADONLY;
+
+       if (!libxfs_init(&xi))
+               usage();
+
+       /* check we got the info for all the sections we are trying to modify */
+       if (!xi.ddev) {
+               fprintf(stderr, "%s: failed to access data device for %s\n",
+                       progname, fname);
+               exit(1);
+       }
+       if (lflag && !isint && !xi.logdev) {
+               fprintf(stderr, "%s: failed to access external log for %s\n",
+                       progname, fname);
+               exit(1);
+       }
+       if (rflag && !xi.rtdev) {
+               fprintf(stderr, "%s: failed to access realtime device for %s\n",
+                       progname, fname);
+               exit(1);
+       }
+
+       report_info(geo, fname, unwritten, dirversion, isint);
+
+       ddsize = xi.dsize;
+       dlsize = ( xi.logBBsize? xi.logBBsize :
+                       geo.logblocks * (geo.blocksize / BBSIZE) );
+       drsize = xi.rtsize;
+
+       error = 0;
+       if (dflag | aflag) {
+               xfs_growfs_data_t       in;
+               
+               if (!mflag)
+                       maxpct = geo.imaxpct;
+               if (!dsize)
+                       dsize = ddsize / (geo.blocksize / BBSIZE);
+               else if (dsize > ddsize / (geo.blocksize / BBSIZE)) {
+                       fprintf(stderr,
+                               "data size %llu too large, maximum is %lld\n",
+                               (__u64)dsize, ddsize/(geo.blocksize/BBSIZE));
+                       error = 1;
+               }
+               if (!error && dsize < geo.datablocks) {
+                       fprintf(stderr, "data size %llu too small,"
+                               " old size is %lld\n",
+                               (__u64)dsize, geo.datablocks);
+                       error = 1;
+               } else if (!error &&
+                          dsize == geo.datablocks && maxpct == geo.imaxpct) {
+                       if (dflag)
+                               fprintf(stderr,
+                                       "data size unchanged, skipping\n");
+                       if (mflag)
+                               fprintf(stderr,
+                                       "inode max pct unchanged, skipping\n");
+               } else if (!error && !nflag) {
+                       in.newblocks = (__u64)dsize;
+                       in.imaxpct = (__u32)maxpct;
+                       if (ioctl(ffd, XFS_IOC_FSGROWFSDATA, &in) < 0) {
+                               if (errno == EWOULDBLOCK)
+                                       fprintf(stderr,
+                                "%s: growfs operation in progress already\n",
+                                               progname);
+                               else
+                                       fprintf(stderr,
+                               "%s: ioctl failed - XFS_IOC_FSGROWFSDATA: %s\n",
+                                               progname, strerror(errno));
+                               error = 1;
+                       }
+               }
+       }
+
+       if (!error && (rflag | aflag)) {
+               xfs_growfs_rt_t in;
+
+               if (!esize)
+                       esize = (__u32)geo.rtextsize;
+               if (!rsize)
+                       rsize = drsize / (geo.blocksize / BBSIZE);
+               else if (rsize > drsize / (geo.blocksize / BBSIZE)) {
+                       fprintf(stderr,
+                       "realtime size %lld too large, maximum is %lld\n",
+                               rsize, drsize / (geo.blocksize / BBSIZE));
+                       error = 1;
+               }
+               if (!error && rsize < geo.rtblocks) {
+                       fprintf(stderr,
+                       "realtime size %lld too small, old size is %lld\n",
+                               rsize, geo.rtblocks);
+                       error = 1;
+               } else if (!error && rsize == geo.rtblocks) {
+                       if (rflag)
+                               fprintf(stderr,
+                                       "realtime size unchanged, skipping\n");
+               } else if (!error && !nflag) {
+                       in.newblocks = (__u64)rsize;
+                       in.extsize = (__u32)esize;
+                       if (ioctl(ffd, XFS_IOC_FSGROWFSRT, &in) < 0) {
+                               if (errno == EWOULDBLOCK)
+                                       fprintf(stderr,
+                               "%s: growfs operation in progress already\n",
+                                               progname);
+                               else if (errno == ENOSYS)
+                                       fprintf(stderr,
+                               "%s: realtime growth not implemented\n",
+                                               progname);
+                               else
+                                       fprintf(stderr,
+                               "%s: ioctl failed - XFS_IOC_FSGROWFSRT: %s\n",
+                                               progname, strerror(errno));
+                               error = 1;
+                       }
+               }
+       }
+
+       if (!error && (lflag | aflag)) {
+               xfs_growfs_log_t        in;
+
+               if (!lsize)
+                       lsize = dlsize / (geo.blocksize / BBSIZE);
+               if (iflag)
+                       in.isint = 1;
+               else if (xflag)
+                       in.isint = 0;
+               else 
+                       in.isint = xi.logBBsize == 0;
+               if (lsize == geo.logblocks && (in.isint == isint)) {
+                       if (lflag)
+                               fprintf(stderr,
+                                       "log size unchanged, skipping\n");
+               } else if (!nflag) {
+                       in.newblocks = (__u32)lsize;
+                       if (ioctl(ffd, XFS_IOC_FSGROWFSLOG, &in) < 0) {
+                               if (errno == EWOULDBLOCK)
+                                       fprintf(stderr,
+                               "%s: growfs operation in progress already\n",
+                                               progname);
+                               else if (errno == ENOSYS)
+                                       fprintf(stderr,
+                               "%s: log growth not supported yet\n", progname);
+                               else
+                                       fprintf(stderr,
+                               "%s: ioctl failed - XFS_IOC_FSGROWFSLOG: %s\n",
+                                               progname, strerror(errno));
+                               error = 1;
+                       }
+               }
+       }
+
+       if (ioctl(ffd, XFS_IOC_FSGEOMETRY, &ngeo) < 0) {
+               fprintf(stderr, "%s: ioctl failed - XFS_IOC_FSGEOMETRY: %s\n",
+                       progname, strerror(errno));
+               exit(1);
+       }
+       if (geo.datablocks != ngeo.datablocks)
+               printf("data blocks changed from %lld to %lld\n",
+                       geo.datablocks, ngeo.datablocks);
+       if (geo.imaxpct != ngeo.imaxpct)
+               printf("inode max percent changed from %d to %d\n",
+                       geo.imaxpct, ngeo.imaxpct);
+       if (geo.logblocks != ngeo.logblocks)
+               printf("log blocks changed from %d to %d\n",
+                       geo.logblocks, ngeo.logblocks);
+       if ((geo.logstart == 0) != (ngeo.logstart == 0))
+               printf("log changed from %s to %s\n",
+                       geo.logstart ? "internal" : "external",
+                       ngeo.logstart ? "internal" : "external");
+       if (geo.rtblocks != ngeo.rtblocks)
+               printf("realtime blocks changed from %lld to %lld\n",
+                       geo.rtblocks, ngeo.rtblocks);
+       if (geo.rtextsize != ngeo.rtextsize)
+               printf("realtime extent size changed from %d to %d\n",
+                       geo.rtextsize, ngeo.rtextsize);
+       exit(0);
+}
diff --git a/growfs/xfs_info.sh b/growfs/xfs_info.sh
new file mode 100755 (executable)
index 0000000..2b1316f
--- /dev/null
@@ -0,0 +1,56 @@
+#!/bin/sh -f
+#
+# Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+# 
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# 
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like.  Any license provided herein, whether implied or
+# otherwise, applies only to this software file.  Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write the Free Software Foundation, Inc., 59
+# Temple Place - Suite 330, Boston MA 02111-1307, USA.
+# 
+# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+# Mountain View, CA  94043, or:
+# 
+# http://www.sgi.com 
+# 
+# For further information regarding this notice, see: 
+# 
+# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+#
+
+OPTS=""
+USAGE="Usage: xfs_info [-t mtab] mountpoint"
+
+while getopts "t:" c
+do
+       case $c in
+       t)      OPTS="-t $OPTARG" ;;
+       *)      echo $USAGE 1>&2
+               exit 2
+               ;;
+       esac
+done
+set -- extra $@
+shift $OPTIND
+case $# in
+       1)      xfs_growfs -p xfs_info -n $OPTS $1
+               status=$?
+               ;;
+       *)      echo $USAGE 1>&2
+               exit 2
+               ;;
+esac
+exit $status
diff --git a/include/Makefile b/include/Makefile
new file mode 100644 (file)
index 0000000..60d0a28
--- /dev/null
@@ -0,0 +1,52 @@
+#
+# Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+# 
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# 
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like.  Any license provided herein, whether implied or
+# otherwise, applies only to this software file.  Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write the Free Software Foundation, Inc., 59
+# Temple Place - Suite 330, Boston MA 02111-1307, USA.
+# 
+# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+# Mountain View, CA  94043, or:
+# 
+# http://www.sgi.com 
+# 
+# For further information regarding this notice, see: 
+# 
+# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+#
+
+TOPDIR = ..
+include $(TOPDIR)/include/builddefs
+
+LSRCFILES = libxfs.h acl.h arch.h attributes.h handle.h jdm.h \
+       platform_defs.h.in builddefs.in buildrules \
+       xfs_ag.h xfs_alloc.h xfs_alloc_btree.h xfs_arch.h xfs_attr_leaf.h \
+       xfs_attr_sf.h xfs_bit.h xfs_bmap.h xfs_bmap_btree.h xfs_btree.h \
+       xfs_buf_item.h xfs_cred.h xfs_da_btree.h xfs_dfrag.h xfs_dinode.h \
+       xfs_dir.h xfs_dir2.h xfs_dir2_block.h xfs_dir2_data.h xfs_dir2_leaf.h \
+       xfs_dir2_node.h xfs_dir2_sf.h xfs_dir_leaf.h xfs_dir_sf.h xfs_dqblk.h \
+       xfs_dquot_item.h xfs_extfree_item.h xfs_fs.h xfs_ialloc.h \
+       xfs_ialloc_btree.h xfs_imap.h xfs_inode.h xfs_inode_item.h xfs_inum.h \
+       xfs_log.h xfs_log_priv.h xfs_log_recover.h xfs_mount.h xfs_quota.h \
+       xfs_rtalloc.h xfs_sb.h xfs_trans.h xfs_trans_space.h xfs_types.h
+
+default :
+
+include $(BUILDRULES)
+
+install : default
diff --git a/include/arch.h b/include/arch.h
new file mode 100644 (file)
index 0000000..12ce1c5
--- /dev/null
@@ -0,0 +1,236 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_SUPPORT_ARCH_H__
+#define __XFS_SUPPORT_ARCH_H__
+
+#ifdef __KERNEL__
+
+#include <asm/byteorder.h>
+
+#ifdef __LITTLE_ENDIAN
+# define __BYTE_ORDER  __LITTLE_ENDIAN
+#endif
+#ifdef __BIG_ENDIAN
+# define __BYTE_ORDER  __BIG_ENDIAN
+#endif
+
+#else
+
+#include <linux/byteorder/swab.h>
+
+#endif /* __KERNEL__ */
+
+/* do we need conversion? */
+
+#define ARCH_NOCONVERT 1
+#if __BYTE_ORDER == __LITTLE_ENDIAN 
+#define ARCH_CONVERT   0
+#else
+#define ARCH_CONVERT   ARCH_NOCONVERT
+#endif
+
+/* generic swapping macros */
+
+#define INT_SWAP16(A) ((typeof(A))(__swab16((__u16)A)))
+#define INT_SWAP32(A) ((typeof(A))(__swab32((__u32)A)))
+#define INT_SWAP64(A) ((typeof(A))(__swab64((__u64)A)))
+
+#define INT_SWAP(type, var) \
+    ((sizeof(type) == 8) ? INT_SWAP64(var) : \
+    ((sizeof(type) == 4) ? INT_SWAP32(var) : \
+    ((sizeof(type) == 2) ? INT_SWAP16(var) : \
+    (var))))
+  
+
+#define INT_SWAP_UNALIGNED_32(from,to) \
+    { \
+        ((__u8*)(to))[0] = ((__u8*)(from))[3]; \
+        ((__u8*)(to))[1] = ((__u8*)(from))[2]; \
+        ((__u8*)(to))[2] = ((__u8*)(from))[1]; \
+        ((__u8*)(to))[3] = ((__u8*)(from))[0]; \
+    }
+
+#define INT_SWAP_UNALIGNED_64(from,to) \
+    { \
+        INT_SWAP_UNALIGNED_32( ((__u8*)(from)) + 4, ((__u8*)(to))); \
+        INT_SWAP_UNALIGNED_32( ((__u8*)(from)), ((__u8*)(to)) + 4); \
+    }
+
+/* 
+ * get and set integers from potentially unaligned locations
+ */
+        
+#define INT_GET_UNALIGNED_16_LE(pointer) \
+   ((__u16)((((__u8*)(pointer))[0]      ) | (((__u8*)(pointer))[1] << 8 )))
+#define INT_GET_UNALIGNED_16_BE(pointer) \
+   ((__u16)((((__u8*)(pointer))[0] << 8) | (((__u8*)(pointer))[1])))
+#define INT_SET_UNALIGNED_16_LE(pointer,value) \
+    { \
+        ((__u8*)(pointer))[0] = (((value)     ) & 0xff); \
+        ((__u8*)(pointer))[1] = (((value) >> 8) & 0xff); \
+    }
+#define INT_SET_UNALIGNED_16_BE(pointer,value) \
+    { \
+        ((__u8*)(pointer))[0] = (((value) >> 8) & 0xff); \
+        ((__u8*)(pointer))[1] = (((value)     ) & 0xff); \
+    }
+   
+#define INT_GET_UNALIGNED_32_LE(pointer) \
+   ((__u32)((((__u8*)(pointer))[0]      ) | (((__u8*)(pointer))[1] << 8 ) \
+           |(((__u8*)(pointer))[2] << 16) | (((__u8*)(pointer))[3] << 24)))
+#define INT_GET_UNALIGNED_32_BE(pointer) \
+   ((__u32)((((__u8*)(pointer))[0] << 24) | (((__u8*)(pointer))[1] << 16) \
+           |(((__u8*)(pointer))[2] << 8)  | (((__u8*)(pointer))[3]      )))
+    
+#define INT_GET_UNALIGNED_64_LE(pointer) \
+   (((__u64)(INT_GET_UNALIGNED_32_LE(((__u8*)(pointer))+4)) << 32 ) \
+   |((__u64)(INT_GET_UNALIGNED_32_LE(((__u8*)(pointer))  ))       ))
+#define INT_GET_UNALIGNED_64_BE(pointer) \
+   (((__u64)(INT_GET_UNALIGNED_32_BE(((__u8*)(pointer))  )) << 32  ) \
+   |((__u64)(INT_GET_UNALIGNED_32_BE(((__u8*)(pointer))+4))        ))
+   
+/*
+ * now pick the right ones for our MACHINE ARCHITECTURE
+ */
+   
+#if __BYTE_ORDER == __LITTLE_ENDIAN 
+#define INT_GET_UNALIGNED_16(pointer)       INT_GET_UNALIGNED_16_LE(pointer)
+#define INT_SET_UNALIGNED_16(pointer,value) INT_SET_UNALIGNED_16_LE(pointer,value)
+#define INT_GET_UNALIGNED_32(pointer)       INT_GET_UNALIGNED_32_LE(pointer)
+#define INT_GET_UNALIGNED_64(pointer)       INT_GET_UNALIGNED_64_LE(pointer)
+#else
+#define INT_GET_UNALIGNED_16(pointer)       INT_GET_UNALIGNED_16_BE(pointer)
+#define INT_SET_UNALIGNED_16(pointer,value) INT_SET_UNALIGNED_16_BE(pointer,value)
+#define INT_GET_UNALIGNED_32(pointer)       INT_GET_UNALIGNED_32_BE(pointer)
+#define INT_GET_UNALIGNED_64(pointer)       INT_GET_UNALIGNED_64_BE(pointer)
+#endif
+
+/* define generic INT_ macros */
+
+#define INT_GET(reference,arch) \
+    (((arch) == ARCH_NOCONVERT) \
+        ? \
+            (reference) \
+        : \
+            INT_SWAP((reference),(reference)) \
+    )
+
+/* does not return a value */   
+#define INT_SET(reference,arch,valueref) \
+    (void)( \
+        ((reference) = (valueref)), \
+        ( \
+           ((arch) != ARCH_NOCONVERT) ? \
+               (reference) = INT_SWAP((reference),(reference)) \
+           : 0 \
+        ) \
+    )
+
+/* does not return a value */   
+#define INT_MOD_EXPR(reference,arch,code) \
+    (void)(((arch) == ARCH_NOCONVERT) \
+        ? \
+            ((reference) code) \
+        : \
+            ( \
+                (reference) = INT_GET((reference),arch) , \
+                ((reference) code), \
+                INT_SET(reference, arch, reference) \
+            ) \
+    )
+    
+/* does not return a value */   
+#define INT_MOD(reference,arch,delta) \
+    (void)( \
+        INT_MOD_EXPR(reference,arch,+=(delta)) \
+    )
+    
+/*
+ * INT_COPY - copy a value between two locations with the
+ *            _same architecture_ but _potentially different sizes_
+ *
+ *          if the types of the two parameters are equal or they are
+ *              in native architecture, a simple copy is done
+ *
+ *          otherwise, architecture conversions are done
+ *
+ */
+    
+/* does not return a value */   
+#define INT_COPY(dst,src,arch) \
+    (void)( \
+        ((sizeof(dst) == sizeof(src)) || ((arch) == ARCH_NOCONVERT)) \
+            ? \
+                ((dst) = (src)) \
+            : \
+                INT_SET(dst, arch, INT_GET(src, arch)) \
+    )
+    
+/*
+ * INT_XLATE - copy a value in either direction between two locations 
+ *             with different architectures 
+ *
+ *                  dir < 0     - copy from memory to buffer (native to arch)
+ *                  dir > 0     - copy from buffer to memory (arch to native)
+ */
+    
+/* does not return a value */   
+#define INT_XLATE(buf,mem,dir,arch) {\
+    ASSERT(dir); \
+    if (dir>0) { \
+        (mem)=INT_GET(buf, arch); \
+    } else { \
+        INT_SET(buf, arch, mem); \
+    } \
+}
+
+#define INT_ISZERO(reference,arch) \
+    ((reference) == 0)
+    
+#define INT_ZERO(reference,arch) \
+    ((reference) = 0)
+        
+#define INT_GET_UNALIGNED_16_ARCH(pointer,arch) \
+    ( ((arch) == ARCH_NOCONVERT) \
+        ? \
+            (INT_GET_UNALIGNED_16(pointer)) \
+        : \
+            (INT_GET_UNALIGNED_16_BE(pointer)) \
+    )
+#define INT_SET_UNALIGNED_16_ARCH(pointer,value,arch) \
+    if ((arch) == ARCH_NOCONVERT) { \
+        INT_SET_UNALIGNED_16(pointer,value); \
+    } else { \
+        INT_SET_UNALIGNED_16_BE(pointer,value); \
+    }
+
+#endif /* __XFS_SUPPORT_ARCH_H__ */
diff --git a/include/builddefs.in b/include/builddefs.in
new file mode 100644 (file)
index 0000000..0f10b8a
--- /dev/null
@@ -0,0 +1,173 @@
+#
+# Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+# 
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# 
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like.  Any license provided herein, whether implied or
+# otherwise, applies only to this software file.  Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write the Free Software Foundation, Inc., 59
+# Temple Place - Suite 330, Boston MA 02111-1307, USA.
+# 
+# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+# Mountain View, CA  94043, or:
+# 
+# http://www.sgi.com 
+# 
+# For further information regarding this notice, see: 
+# 
+# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+#
+# @configure_input@
+#
+
+ifndef _BUILDDEFS_INCLUDED_
+_BUILDDEFS_INCLUDED_ = 1
+
+DEBUG = @debug_build@
+OPTIMIZER = @opt_build@
+MALLOCLIB = @malloc_lib@
+
+LIBXFS = $(TOPDIR)/libxfs/libxfs.a
+LIBATTR = $(TOPDIR)/libattr/libattr.a
+LIBHANDLE = $(TOPDIR)/handle/libhandle.a
+LIBUUID = /usr/lib/libuuid.a
+LIBLVM = @liblvm@
+
+BUILDRULES = $(TOPDIR)/include/buildrules
+
+# General package information
+TARGET_OS = @host_platform@
+PACKAGE_NAME = @package_name@
+PACKAGE_RELEASE = @package_release@
+PACKAGE_VERSION = @package_version@
+PACKAGE_DISTRIBUTION = @package_distribution@
+PACKAGE_BUILDER        = @package_builder@
+XFS_CMDS_SBIN_DIR = @xfs_cmds_sbin_dir@
+XFS_CMDS_BIN_DIR = @xfs_cmds_bin_dir@
+XFS_CMDS_LIB_DIR = @xfs_cmds_lib_dir@
+XFS_CMDS_SHARE_DIR = @xfs_cmds_share_dir@
+XFS_CMDS_INC_DIR = @xfs_cmds_inc_dir@
+XFS_CMDS_MAN_DIR = @xfs_cmds_man_dir@
+XFS_CMDS_TMP_DIR = @xfs_cmds_tmp_dir@
+XFS_CMDS_DOC_DIR = @xfs_cmds_doc_dir@
+
+# LCFLAGS, LLDFLAGS, LLDLIBS, LSRCFILES and LDIRT may be specified in
+# user Makefiles. Note: LSRCFILES is anything other than Makefile, $(CFILES)
+# $(CXXFILES), or $(HFILES) and is used to construct the manifest list
+# during the "dist" phase (packaging).
+
+CFLAGS += $(OPTIMIZER) $(DEBUG) -funsigned-char -Wall -Wno-parentheses \
+       $(LCFLAGS) -I$(TOPDIR)/include '-DVERSION="$(PACKAGE_VERSION)"' \
+       -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE \
+       -DXFS_BIG_FILES=1 -DXFS_BIG_FILESYSTEMS=1 -DHAVE_LIBLVM=@have_liblvm@
+
+LDFLAGS = $(LLDFLAGS)
+LDLIBS = $(LLDLIBS) $(MALLOCLIB)
+
+MAKEOPTS = --no-print-directory
+SRCFILES = Makefile $(HFILES) $(CFILES) $(LSRCFILES) $(LFILES) $(YFILES)
+DIRT = $(LDIRT) dep dep.bak $(OBJECTS) $(CMDTARGET) $(LIBTARGET) \
+       $(STATICLIBTARGET) *.[1-9].gz
+
+OBJECTS = $(ASFILES:.s=.o) \
+          $(CFILES:.c=.o) \
+          $(LFILES:.l=.o) \
+          $(YFILES:%.y=%.tab.o)
+
+MAKE   = @make@
+CC     = @cc@
+LD     = @ld@
+AWK    = @awk@
+SED    = @sed@
+INSTALL        = $(TOPDIR)/install-sh -o root -g root
+ECHO   = @echo@
+LN_S   = @LN_S@
+
+CCF    = $(CC) $(CFLAGS)
+MAKEF  = $(MAKE) $(MAKEOPTS)
+CXXF   = $(CXX) $(CXXFLAGS)
+LDF    = $(LD) $(LDFLAGS)
+MAKEDEPEND  = @makedepend@
+
+ZIP    = @zip@
+TAR    = @tar@
+RPM    = @rpm@
+RPM_VERSION = @rpm_version@
+
+HAVE_ZIPPED_MANPAGES = @have_zipped_manpages@
+
+SHELL = /bin/sh
+IMAGES_DIR = $(TOPDIR)/all-images
+DIST_DIR = $(TOPDIR)/dist
+
+SUBDIRS_MAKERULE = \
+       @for d in $(SUBDIRS) ""; do \
+           if test -d "$$d" -a ! -z "$$d"; then \
+               $(ECHO) === $$d ===; \
+               $(MAKEF) -C $$d $@ || exit $$?; \
+           fi; \
+       done
+
+MAN_MAKERULE = \
+    @for f in *.[12345678] ""; do \
+       if test ! -z "$$f"; then \
+           $(ZIP) --best -c < $$f > $$f.gz; \
+       fi; \
+    done
+
+INSTALL_MAN = \
+    @for d in $(MAN_PAGES); do \
+       first=true; \
+       for m in `$(AWK) '/^\.SH NAME/ {ok=1; next} ok {print; exit}' $$d \
+       | sed -e 's/,/ /g' -e 's/\\-.*//' -e 's/\\\f[0-9]//g' -e 's/  / /g;q'`; \
+       do \
+           [ -z "$$m" -o "$$m" = "\\" ] && continue; \
+           t=$(MAN_DEST)/$$m.$(MAN_SECTION); \
+           if $$first; then \
+               if $(HAVE_ZIPPED_MANPAGES); then \
+                   $(ZIP) --best -c $$d > $$d.gz; _sfx=.gz; \
+               fi; \
+               u=$$m.$(MAN_SECTION)$$_sfx; \
+               echo $(INSTALL) -m 644 $${d}$$_sfx $${t}$$_sfx; \
+               $(INSTALL) -m 644 $${d}$$_sfx $${t}$$_sfx; \
+           else \
+               echo $(INSTALL) -S $$u $${t}$$_sfx; \
+               $(INSTALL) -S $$u $${t}$$_sfx; \
+           fi; \
+           first=false; \
+       done; \
+    done
+
+DIST_MAKERULE = \
+       $(MAKEF) -C build dist
+
+SOURCE_MAKERULE = \
+       @test -z "$$DIR" && DIR="."; \
+       for f in $(SRCFILES) ""; do \
+           if test ! -z "$$f"; then $(ECHO) $$DIR/$$f; fi;\
+       done; \
+       for d in `echo $(SUBDIRS)` ; do \
+           if test -d "$$d" -a ! -z "$$d"; then \
+               $(MAKEF) DIR=$$DIR/$$d -C $$d $@ || exit $$?; \
+           fi; \
+       done
+
+endif
+
+#
+# For targets that should always be rebuilt,
+# define a target that is never up-to-date.
+# Targets needing this should depend on $(_FORCE)
+_FORCE = __force_build
diff --git a/include/buildrules b/include/buildrules
new file mode 100644 (file)
index 0000000..af2a7ca
--- /dev/null
@@ -0,0 +1,76 @@
+#
+# Copyright (C) 1999 Silicon Graphics, Inc.  All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of version 2 of the GNU General Public License as published
+# by the Free Software Fondation.
+# 
+# This program is distributed in the hope that it would be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  Further, any license provided herein,
+# whether implied or otherwise, is limited to this program in accordance with
+# the express provisions of the GNU General Public License.  Patent licenses,
+# if any, provided herein do not apply to combinations of this program with
+# other product or programs, or any other product whatsoever.  This program is
+# distributed without any warranty that the program is delivered free of the
+# rightful claim of any third person by way of infringement or the like.  See
+# the GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write the Free Software Foundation, Inc., 59 Temple
+# Place - Suite 330, Boston MA 02111-1307, USA.
+# Common build rules for gmake
+#
+ifndef _BUILDRULES_INCLUDED_
+_BUILDRULES_INCLUDED_ = 1
+
+include $(TOPDIR)/include/builddefs
+
+#
+# Standard targets
+#
+ifdef CMDTARGET
+$(CMDTARGET) : $(SUBDIRS) $(OBJECTS) $(CMDDEPS)
+       $(CCF) -o $(CMDTARGET) $(LDFLAGS) $(OBJECTS) $(LDLIBS) 
+endif
+
+ifdef LIBTARGET
+$(LIBTARGET) : $(SUBDIRS) $(OBJECTS)
+       $(CC) $(LDFLAGS) -shared -Wl,-soname,$(LIBTARGET) -o $(LIBTARGET) \
+               $(OBJECTS) $(LDLIBS) $(LIB_FOR_DLOPEN) $(LIB_FOR_BASENAME)
+endif
+
+ifdef STATICLIBTARGET
+$(STATICLIBTARGET) : $(SUBDIRS) $(OBJECTS)
+       $(AR) crf $(STATICLIBTARGET) $?
+endif
+
+clean clobber : $(SUBDIRS)
+       rm -f $(DIRT)
+       $(SUBDIRS_MAKERULE)
+
+# Never blow away subdirs
+ifdef SUBDIRS
+.PRECIOUS: $(SUBDIRS)
+$(SUBDIRS):
+       $(SUBDIRS_MAKERULE)
+endif
+
+source :
+       $(SOURCE_MAKERULE)
+
+endif
+
+$(_FORCE):
+
+.PHONY : depend
+
+depend : $(CFILES) $(HFILES)
+       $(SUBDIRS_MAKERULE)
+       touch dep
+       $(MAKEDEPEND) -fdep -- $(CFLAGS) -- $(CFILES)
+
+# Include dep, but only if it exists
+ifeq ($(shell test -f dep && echo dep), dep)
+include dep
+endif
diff --git a/include/handle.h b/include/handle.h
new file mode 100644 (file)
index 0000000..0400a70
--- /dev/null
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __HANDLE_H__
+#define __HANDLE_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern int  path_to_handle (char *__path, void **__hanp, size_t *__hlen);
+extern int  path_to_fshandle (char *__path, void **__hanp, size_t *__hlen);
+extern int  fd_to_handle (int __fd, void **__hanp, size_t *__hlen);
+extern int  handle_to_fshandle (void *__hanp, size_t __hlen, void **__fshanp,
+                               size_t *__fshlen);
+extern void free_handle (void *__hanp, size_t __hlen);
+extern int  open_by_handle (void *__hanp, size_t __hlen, int __rw);
+extern int  readlink_by_handle (void *__hanp, size_t __hlen, void *__buf,
+                               size_t __bs);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __HANDLE_H__ */
diff --git a/include/jdm.h b/include/jdm.h
new file mode 100644 (file)
index 0000000..3d20120
--- /dev/null
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __JDM_H__
+#define __JDM_H__
+
+typedef int    intgen_t;
+typedef void   jdm_fshandle_t;
+
+struct xfs_bstat;
+extern jdm_fshandle_t *jdm_getfshandle (char *mntpnt);
+extern intgen_t jdm_open       (jdm_fshandle_t *fsh, struct xfs_bstat *sp,
+                                intgen_t oflags);
+extern intgen_t jdm_readlink   (jdm_fshandle_t *fsh, struct xfs_bstat *sp,
+                                char *bufp, size_t bufsz);
+
+#ifdef EXTATTR
+
+struct attrlist_cursor;
+extern intgen_t jdm_attr_multi (jdm_fshandle_t *fsh, struct xfs_bstat *sp,
+                                char *bufp, int rtrvcnt, int flags);
+extern intgen_t        jdm_attr_list   (jdm_fshandle_t *fsh, struct xfs_bstat *sp,
+                                char *bufp, size_t bufsz, int flags, 
+                                struct attrlist_cursor *cursor);
+#endif /* EXTATTR */
+
+/* macro for determining the size of a structure member */
+#define sizeofmember( t, m )   sizeof( ( ( t * )0 )->m )
+
+/* macro for calculating the offset of a structure member */
+#define offsetofmember( t, m ) ( ( size_t )( char * )&( ( ( t * )0 )->m ) )
+
+#endif /* __JDM_H__ */
diff --git a/include/libxfs.h b/include/libxfs.h
new file mode 100644 (file)
index 0000000..78e5978
--- /dev/null
@@ -0,0 +1,474 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __LIBXFS_H__
+#define __LIBXFS_H__
+
+#include "platform_defs.h"
+
+#include <uuid/uuid.h>
+#include <xfs_fs.h>
+#include <xfs_types.h>
+#include <arch.h>
+#include <xfs_arch.h>
+#include <xfs_sb.h>
+#include <xfs_bit.h>
+#include <xfs_inum.h>
+#include <xfs_ag.h>
+#include <xfs_da_btree.h>
+#include <xfs_bmap_btree.h>
+#include <xfs_alloc_btree.h>
+#include <xfs_ialloc_btree.h>
+#include <xfs_alloc.h>
+#include <xfs_ialloc.h>
+#include <xfs_rtalloc.h>
+#include <xfs_btree.h>
+#include <xfs_dir.h>
+#include <xfs_dir_sf.h>
+#include <xfs_dir_leaf.h>
+#include <xfs_dir2.h>
+#include <xfs_dir2_data.h>
+#include <xfs_dir2_leaf.h>
+#include <xfs_dir2_block.h>
+#include <xfs_dir2_node.h>
+#include <xfs_dir2_sf.h>
+#include <xfs_attr_sf.h>
+#include <xfs_dinode.h>
+#include <xfs_attr_leaf.h>
+#include <xfs_quota.h>
+#include <xfs_dqblk.h>
+#include <xfs_mount.h>
+#include <xfs_trans_space.h>
+#include <xfs_inode.h>
+#include <xfs_buf_item.h>
+#include <xfs_inode_item.h>
+#include <xfs_cred.h>
+#include <xfs_bmap.h>
+#include <xfs_imap.h>
+#include <xfs_log.h>
+#include <xfs_log_priv.h>
+
+/*
+ * Argument structure for libxfs_init().
+ */
+typedef struct {
+                                /* input parameters */
+        char            *volname;       /* pathname of volume */
+        char            *dname;         /* pathname of data "subvolume" */
+        char            *logname;       /* pathname of log "subvolume" */
+        char            *rtname;        /* pathname of realtime "subvolume" */
+        int             isreadonly;     /* filesystem is only read in applic */
+        int             disfile;        /* data "subvolume" is a regular file */        int             dcreat;         /* try to create data subvolume */
+        int             lisfile;        /* log "subvolume" is a regular file */
+        int             lcreat;         /* try to create log subvolume */
+        int             risfile;        /* realtime "subvolume" is a reg file */        int             rcreat;         /* try to create realtime subvolume */
+        char            *notvolmsg;     /* format string for not XLV message */
+        int             notvolok;       /* set if not XLV => try data */
+                                /* output results */
+        dev_t           ddev;           /* device for data subvolume */
+        dev_t           logdev;         /* device for log subvolume */
+        dev_t           rtdev;          /* device for realtime subvolume */
+        long long       dsize;          /* size of data subvolume (BBs) */
+        long long       logBBsize;      /* size of log subvolume (BBs) */
+                                        /* (blocks allocated for use as 
+                                         * log is stored in mount structure) */
+        long long       logBBstart;     /* start block of log subvolume (BBs) */        long long       rtsize;         /* size of realtime subvolume (BBs) */
+        int             dfd;            /* data subvolume file descriptor */
+        int             logfd;          /* log subvolume file descriptor */
+        int             rtfd;           /* realtime subvolume file descriptor */
+} libxfs_init_t;
+
+#define LIBXFS_ISREADONLY      0x0069  /* disallow all mounted filesystems */
+#define LIBXFS_ISINACTIVE      0x6900  /* allow mounted only if mounted ro */
+
+extern char    *progname;
+extern int     libxfs_init (libxfs_init_t *);
+extern int     libxfs_device_to_fd (dev_t);
+extern dev_t   libxfs_device_open (char *, int, int);
+extern void    libxfs_device_zero (dev_t, xfs_daddr_t, uint);
+extern void    libxfs_device_close (dev_t);
+
+/* check or write log footer: specify device, log size in blocks & uuid */
+extern int     libxfs_log_clear (dev_t, xfs_daddr_t, uint, uuid_t *, int);
+
+/* 
+ * Define a user-level mount structure with all we need
+ * in order to make use of the numerous XFS_* macros.
+ */
+struct xfs_inode;
+typedef struct xfs_mount {
+       xfs_sb_t                m_sb;           /* copy of fs superblock */
+       int                     m_bsize;        /* fs logical block size */
+       xfs_agnumber_t          m_agfrotor;     /* last ag where space found */
+       xfs_agnumber_t          m_agirotor;     /* last ag dir inode alloced */
+       uint                    m_rsumlevels;   /* rt summary levels */
+       uint                    m_rsumsize;     /* size of rt summary, bytes */
+       struct xfs_inode        *m_rbmip;       /* pointer to bitmap inode */
+       struct xfs_inode        *m_rsumip;      /* pointer to summary inode */
+       struct xfs_inode        *m_rootip;      /* pointer to root directory */
+       dev_t                   m_dev;
+       dev_t                   m_logdev;
+       dev_t                   m_rtdev;
+       __uint8_t               m_dircook_elog; /* log d-cookie entry bits */
+       __uint8_t               m_blkbit_log;   /* blocklog + NBBY */
+       __uint8_t               m_blkbb_log;    /* blocklog - BBSHIFT */
+       __uint8_t               m_agno_log;     /* log #ag's */
+       __uint8_t               m_agino_log;    /* #bits for agino in inum */
+       __uint16_t              m_inode_cluster_size;/* min inode buf size */
+       uint                    m_blockmask;    /* sb_blocksize-1 */
+       uint                    m_blockwsize;   /* sb_blocksize in words */
+       uint                    m_blockwmask;   /* blockwsize-1 */
+       uint                    m_alloc_mxr[2]; /* XFS_ALLOC_BLOCK_MAXRECS */
+       uint                    m_alloc_mnr[2]; /* XFS_ALLOC_BLOCK_MINRECS */
+       uint                    m_bmap_dmxr[2]; /* XFS_BMAP_BLOCK_DMAXRECS */
+       uint                    m_bmap_dmnr[2]; /* XFS_BMAP_BLOCK_DMINRECS */
+       uint                    m_inobt_mxr[2]; /* XFS_INOBT_BLOCK_MAXRECS */
+       uint                    m_inobt_mnr[2]; /* XFS_INOBT_BLOCK_MINRECS */
+       uint                    m_ag_maxlevels; /* XFS_AG_MAXLEVELS */
+       uint                    m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */
+       uint                    m_in_maxlevels; /* XFS_IN_MAXLEVELS */
+       xfs_perag_t             *m_perag;       /* per-ag accounting info */
+       uint                    m_flags;        /* global mount flags */
+       uint                    m_qflags;       /* quota status flags */
+       uint                    m_attroffset;   /* inode attribute offset */
+       int                     m_da_node_ents; /* how many entries in danode */
+       int                     m_ialloc_inos;  /* inodes in inode allocation */
+       int                     m_ialloc_blks;  /* blocks in inode allocation */
+       int                     m_litino;       /* size of inode union area */
+       int                     m_inoalign_mask;/* mask sb_inoalignmt if used */
+       xfs_trans_reservations_t m_reservations;/* precomputed res values */
+       __uint64_t              m_maxicount;    /* maximum inode count */
+       int                     m_dalign;       /* stripe unit */
+       int                     m_swidth;       /* stripe width */
+       int                     m_sinoalign;    /* stripe unit inode alignmnt */
+       int                     m_dir_magicpct; /* 37% of the dir blocksize */
+       __uint8_t               m_dirversion;   /* 1 or 2 */
+       int                     m_dirblksize;   /* directory block sz--bytes */
+       int                     m_dirblkfsbs;   /* directory block sz--fsbs */
+       xfs_dablk_t             m_dirdatablk;   /* blockno of dir data v2 */
+       xfs_dablk_t             m_dirleafblk;   /* blockno of dir non-data v2 */
+       xfs_dablk_t             m_dirfreeblk;   /* blockno of dirfreeindex v2 */
+} xfs_mount_t;
+
+
+extern xfs_mount_t     *libxfs_mount (xfs_mount_t *, xfs_sb_t *,
+                               dev_t, dev_t, dev_t, int);
+extern void    libxfs_mount_common (xfs_mount_t *, xfs_sb_t *);
+extern void    libxfs_umount (xfs_mount_t *);
+extern int     libxfs_rtmount_init (xfs_mount_t *);
+extern void    libxfs_alloc_compute_maxlevels (xfs_mount_t *);
+extern void    libxfs_bmap_compute_maxlevels (xfs_mount_t *, int);
+extern void    libxfs_ialloc_compute_maxlevels (xfs_mount_t *);
+extern void    libxfs_trans_init (xfs_mount_t *);
+
+
+/*
+ * Simple I/O interface
+ */
+typedef struct xfs_buf {
+       xfs_daddr_t     b_blkno;
+       unsigned        b_bcount;
+       dev_t           b_dev;
+       void            *b_fsprivate;
+       void            *b_fsprivate2;
+       void            *b_fsprivate3;
+       char            *b_addr;
+       /* b_addr must be the last field */
+} xfs_buf_t;
+#define XFS_BUF_PTR(bp)                        ((bp)->b_addr)
+#define xfs_buf_offset(bp, offset)     (XFS_BUF_PTR(bp) + (offset))
+#define XFS_BUF_ADDR(bp)               ((bp)->b_blkno)
+#define XFS_BUF_COUNT(bp)              ((bp)->b_bcount)
+#define XFS_BUF_TARGET(bp)             ((bp)->b_dev)
+#define XFS_BUF_SET_PTR(bp,p,cnt)      ((bp)->b_addr = (char *)(p)); \
+                                               XFS_BUF_SETCOUNT(bp,cnt)
+#define XFS_BUF_SET_ADDR(bp,blk)       ((bp)->b_blkno = (blk))
+#define XFS_BUF_SETCOUNT(bp,cnt)       ((bp)->b_bcount = (cnt))
+
+#define XFS_BUF_FSPRIVATE(bp,type)     ((type)(bp)->b_fsprivate)
+#define XFS_BUF_SET_FSPRIVATE(bp,val)  (bp)->b_fsprivate = (void *)(val)
+#define XFS_BUF_FSPRIVATE2(bp,type)    ((type)(bp)->b_fsprivate2)
+#define XFS_BUF_SET_FSPRIVATE2(bp,val) (bp)->b_fsprivate2 = (void *)(val)
+#define XFS_BUF_FSPRIVATE3(bp,type)    ((type)(bp)->b_fsprivate3)
+#define XFS_BUF_SET_FSPRIVATE3(bp,val) (bp)->b_fsprivate3 = (void *)(val)
+
+extern xfs_buf_t       *libxfs_getbuf (dev_t, xfs_daddr_t, int);
+extern xfs_buf_t       *libxfs_readbuf (dev_t, xfs_daddr_t, int, int);
+extern xfs_buf_t       *libxfs_getsb (xfs_mount_t *, int);
+extern int     libxfs_readbufr (dev_t, xfs_daddr_t, xfs_buf_t *, int, int);
+extern int     libxfs_writebuf (xfs_buf_t *, int);
+extern int     libxfs_writebuf_int (xfs_buf_t *, int);
+extern void    libxfs_putbuf (xfs_buf_t *);
+
+
+/*
+ * Transaction interface
+ */
+
+typedef struct xfs_log_item {
+       struct xfs_log_item_desc        *li_desc;       /* ptr to current desc*/
+       struct xfs_mount                *li_mountp;     /* ptr to fs mount */
+       uint                            li_type;        /* item type */
+} xfs_log_item_t;
+
+typedef struct xfs_inode_log_item {
+       xfs_log_item_t          ili_item;               /* common portion */
+       struct xfs_inode        *ili_inode;             /* inode pointer */
+       unsigned short          ili_flags;              /* misc flags */
+       unsigned int            ili_last_fields;        /* fields when flushed*/
+       xfs_inode_log_format_t  ili_format;             /* logged structure */
+} xfs_inode_log_item_t;
+
+typedef struct xfs_buf_log_item {
+       xfs_log_item_t          bli_item;       /* common item structure */
+       struct xfs_buf          *bli_buf;       /* real buffer pointer */
+       unsigned int            bli_flags;      /* misc flags */
+       unsigned int            bli_recur;      /* recursion count */
+       xfs_buf_log_format_t    bli_format;     /* in-log header */
+} xfs_buf_log_item_t;
+
+#include <xfs_trans.h>
+
+typedef struct xfs_trans {
+       unsigned int    t_type;                 /* transaction type */
+       xfs_mount_t     *t_mountp;              /* ptr to fs mount struct */
+       unsigned int    t_flags;                /* misc flags */
+       long            t_icount_delta;         /* superblock icount change */
+       long            t_ifree_delta;          /* superblock ifree change */
+       long            t_fdblocks_delta;       /* superblock fdblocks chg */
+       long            t_frextents_delta;      /* superblock freextents chg */
+       unsigned int    t_items_free;           /* log item descs free */
+       xfs_log_item_chunk_t    t_items;        /* first log item desc chunk */
+} xfs_trans_t;
+
+extern xfs_trans_t     *libxfs_trans_alloc (xfs_mount_t *, int);
+extern xfs_trans_t     *libxfs_trans_dup (xfs_trans_t *);
+extern int     libxfs_trans_reserve (xfs_trans_t *, uint,uint,uint,uint,uint);
+extern int     libxfs_trans_commit (xfs_trans_t *, uint, xfs_lsn_t *);
+extern void    libxfs_trans_cancel (xfs_trans_t *, int);
+extern void    libxfs_mod_sb (xfs_trans_t *, __int64_t);
+
+extern int     libxfs_trans_iget (xfs_mount_t *, xfs_trans_t *, xfs_ino_t,
+                               uint, struct xfs_inode **);
+extern void    libxfs_trans_iput(xfs_trans_t *, struct xfs_inode *, uint);
+extern void    libxfs_trans_ijoin (xfs_trans_t *, struct xfs_inode *, uint);
+extern void    libxfs_trans_ihold (xfs_trans_t *, struct xfs_inode *);
+extern void    libxfs_trans_log_inode (xfs_trans_t *, struct xfs_inode *,
+                               uint);
+
+extern void    libxfs_trans_brelse (xfs_trans_t *, struct xfs_buf *);
+extern void    libxfs_trans_binval (xfs_trans_t *, struct xfs_buf *);
+extern void    libxfs_trans_bjoin (xfs_trans_t *, struct xfs_buf *);
+extern void    libxfs_trans_bhold (xfs_trans_t *, struct xfs_buf *);
+extern void    libxfs_trans_log_buf (xfs_trans_t *, struct xfs_buf *,
+                               uint, uint);
+extern xfs_buf_t       *libxfs_trans_get_buf (xfs_trans_t *, dev_t,
+                               xfs_daddr_t, int, uint);
+extern int     libxfs_trans_read_buf (xfs_mount_t *, xfs_trans_t *, dev_t,
+                               xfs_daddr_t, int, uint, struct xfs_buf **);
+
+
+/*
+ * Simple memory interface
+ */
+typedef struct xfs_zone {
+       int     zone_unitsize;  /* Size in bytes of zone unit           */
+       char    *zone_name;     /* tag name                             */
+        int     allocated;      /* debug: How many currently allocated  */
+} xfs_zone_t;
+
+extern xfs_zone_t      *libxfs_zone_init (int, char *);
+extern void    *libxfs_zone_zalloc (xfs_zone_t *);
+extern void    libxfs_zone_free (xfs_zone_t *, void *);
+extern void    *libxfs_malloc (size_t);
+extern void    libxfs_free (void *);
+extern void    *libxfs_realloc (void *, size_t);
+
+
+/*
+ * Inode interface
+ */
+struct xfs_inode_log_item;
+typedef struct xfs_inode {
+       xfs_mount_t             *i_mount;       /* fs mount struct ptr */
+       xfs_ino_t               i_ino;          /* inode number (agno/agino) */
+       xfs_daddr_t             i_blkno;        /* blkno of inode buffer */
+       dev_t                   i_dev;          /* dev for this inode */
+       ushort                  i_len;          /* len of inode buffer */
+       ushort                  i_boffset;      /* off of inode in buffer */
+       xfs_ifork_t             *i_afp;         /* attribute fork pointer */
+       xfs_ifork_t             i_df;           /* data fork */
+       struct xfs_trans        *i_transp;      /* ptr to owning transaction */
+       struct xfs_inode_log_item *i_itemp;     /* logging information */
+       unsigned int            i_delayed_blks; /* count of delay alloc blks */
+       xfs_dinode_core_t       i_d;            /* most of ondisk inode */
+} xfs_inode_t;
+
+extern int     libxfs_inode_alloc (xfs_trans_t **, xfs_inode_t *, mode_t,
+                               ushort, dev_t, cred_t *, xfs_inode_t **);
+extern void    libxfs_trans_inode_alloc_buf (xfs_trans_t *, xfs_buf_t *);
+
+extern void    libxfs_idata_realloc (xfs_inode_t *, int, int);
+extern int     libxfs_iread (xfs_mount_t *, xfs_trans_t *, xfs_ino_t,
+                               xfs_inode_t **, xfs_daddr_t);
+extern void    libxfs_ichgtime (xfs_inode_t *, int);
+extern int     libxfs_iflush_int (xfs_inode_t *, xfs_buf_t *);
+extern int     libxfs_itobp (xfs_mount_t *, xfs_trans_t *, xfs_inode_t *,
+                               xfs_dinode_t **, xfs_buf_t **, xfs_daddr_t);
+extern int     libxfs_iget (xfs_mount_t *, xfs_trans_t *, xfs_ino_t,
+                               uint, xfs_inode_t **, xfs_daddr_t);
+extern void    libxfs_iput (xfs_inode_t *, uint);
+
+
+/*
+ * Directory interface
+ */
+extern void    libxfs_dir_mount (xfs_mount_t *);
+extern void    libxfs_dir2_mount (xfs_mount_t *);
+extern int     libxfs_dir_init (xfs_trans_t *, xfs_inode_t *, xfs_inode_t *);
+extern int     libxfs_dir2_init (xfs_trans_t *, xfs_inode_t *, xfs_inode_t *);
+extern int     libxfs_dir_createname (xfs_trans_t *, xfs_inode_t *, char *,
+                               int, xfs_ino_t, xfs_fsblock_t *,
+                               xfs_bmap_free_t *, xfs_extlen_t);
+extern int     libxfs_dir2_createname (xfs_trans_t *, xfs_inode_t *, char *,
+                               int, xfs_ino_t, xfs_fsblock_t *,
+                               xfs_bmap_free_t *, xfs_extlen_t);
+extern int     libxfs_dir_lookup (xfs_trans_t *, xfs_inode_t *,
+                               char *, int, xfs_ino_t *);
+extern int     libxfs_dir2_lookup (xfs_trans_t *, xfs_inode_t *,
+                               char *, int, xfs_ino_t *);
+extern int     libxfs_dir_replace (xfs_trans_t *, xfs_inode_t *,
+                               char *, int, xfs_ino_t, xfs_fsblock_t *,
+                               xfs_bmap_free_t *, xfs_extlen_t);
+extern int     libxfs_dir2_replace (xfs_trans_t *, xfs_inode_t *,
+                               char *, int, xfs_ino_t, xfs_fsblock_t *,
+                               xfs_bmap_free_t *, xfs_extlen_t);
+extern int     libxfs_dir_removename (xfs_trans_t *, xfs_inode_t *,
+                               char *, int, xfs_ino_t, xfs_fsblock_t *,
+                               xfs_bmap_free_t *, xfs_extlen_t);
+extern int     libxfs_dir2_removename (xfs_trans_t *, xfs_inode_t *,
+                               char *, int, xfs_ino_t, xfs_fsblock_t *,
+                               xfs_bmap_free_t *, xfs_extlen_t);
+extern int     libxfs_dir_bogus_removename (xfs_trans_t *, xfs_inode_t *,
+                               char *, xfs_fsblock_t *, xfs_bmap_free_t *,
+                               xfs_extlen_t, xfs_dahash_t, int);
+extern int     libxfs_dir2_bogus_removename (xfs_trans_t *, xfs_inode_t *,
+                               char *, xfs_fsblock_t *, xfs_bmap_free_t *,
+                               xfs_extlen_t, xfs_dahash_t, int);
+
+
+/*
+ * Block map interface
+ */
+extern int     libxfs_bmapi (xfs_trans_t *, xfs_inode_t *, xfs_fileoff_t,
+                               xfs_filblks_t, int, xfs_fsblock_t *,
+                               xfs_extlen_t, xfs_bmbt_irec_t *, int *,
+                               xfs_bmap_free_t *);
+extern int     libxfs_bmap_finish (xfs_trans_t **, xfs_bmap_free_t *,
+                               xfs_fsblock_t, int *);
+extern int     libxfs_bmap_next_offset (xfs_trans_t *, xfs_inode_t *,
+                               xfs_fileoff_t *, int);
+extern int     libxfs_bunmapi (xfs_trans_t *, xfs_inode_t *, xfs_fileoff_t,
+                               xfs_filblks_t, int, xfs_extnum_t,
+                               xfs_fsblock_t *, xfs_bmap_free_t *, int *);
+extern void    libxfs_bmap_del_free (xfs_bmap_free_t *,
+                               xfs_bmap_free_item_t *, xfs_bmap_free_item_t *);
+
+
+/*
+ * All other routines we want to keep common...
+ */
+
+extern int     libxfs_highbit32 (__uint32_t);
+extern int     libxfs_highbit64 (__uint64_t);
+extern uint    libxfs_da_log2_roundup (uint);
+
+extern void    libxfs_xlate_sb (void *, xfs_sb_t *, int, xfs_arch_t,
+                               __int64_t);
+extern void    libxfs_xlate_dinode_core (xfs_caddr_t buf,
+                               xfs_dinode_core_t *, int, xfs_arch_t);
+
+extern int     libxfs_alloc_fix_freelist (xfs_alloc_arg_t *, int);
+extern int     libxfs_alloc_file_space (xfs_inode_t *, xfs_off_t,
+                               xfs_off_t, int, int);
+
+extern xfs_dahash_t    libxfs_da_hashname (char *, int);
+extern int     libxfs_attr_leaf_newentsize (xfs_da_args_t *, int, int *);
+
+extern xfs_filblks_t   libxfs_bmbt_get_blockcount (xfs_bmbt_rec_t *);
+extern xfs_fileoff_t   libxfs_bmbt_get_startoff (xfs_bmbt_rec_t *);
+extern void    libxfs_bmbt_get_all (xfs_bmbt_rec_t *, xfs_bmbt_irec_t *);
+
+extern int     libxfs_free_extent (xfs_trans_t *, xfs_fsblock_t, xfs_extlen_t);
+extern int     libxfs_rtfree_extent (xfs_trans_t *, xfs_rtblock_t,
+                               xfs_extlen_t);
+
+/* Directory/Attribute routines used by xfs_repair */
+extern void    libxfs_da_bjoin (xfs_trans_t *, xfs_dabuf_t *);
+extern int     libxfs_da_shrink_inode (xfs_da_args_t *, xfs_dablk_t,
+                               xfs_dabuf_t *);
+extern int     libxfs_da_grow_inode (xfs_da_args_t *, xfs_dablk_t *);
+extern void    libxfs_da_bhold (xfs_trans_t *, xfs_dabuf_t *);
+extern void    libxfs_da_brelse (xfs_trans_t *, xfs_dabuf_t *);
+extern int     libxfs_da_read_bufr (xfs_trans_t *, xfs_inode_t *, xfs_dablk_t,
+                               xfs_daddr_t, xfs_dabuf_t **, int);
+extern int     libxfs_da_read_buf (xfs_trans_t *, xfs_inode_t *,
+                               xfs_dablk_t, xfs_daddr_t, xfs_dabuf_t **, int);
+extern int     libxfs_da_get_buf (xfs_trans_t *, xfs_inode_t *,
+                               xfs_dablk_t, xfs_daddr_t, xfs_dabuf_t **, int);
+extern void    libxfs_da_log_buf (xfs_trans_t *, xfs_dabuf_t *, uint, uint);
+extern int     libxfs_dir2_shrink_inode (xfs_da_args_t *, xfs_dir2_db_t,
+                               xfs_dabuf_t *);
+extern int     libxfs_dir2_grow_inode (xfs_da_args_t *, int, xfs_dir2_db_t *);
+extern int     libxfs_dir2_isleaf (xfs_trans_t *, xfs_inode_t *, int *);
+extern int     libxfs_dir2_isblock (xfs_trans_t *, xfs_inode_t *, int *);
+extern void    libxfs_dir2_data_use_free (xfs_trans_t *, xfs_dabuf_t *,
+                               xfs_dir2_data_unused_t *, xfs_dir2_data_aoff_t,
+                               xfs_dir2_data_aoff_t, int *, int *);
+extern void    libxfs_dir2_data_make_free (xfs_trans_t *, xfs_dabuf_t *,
+                               xfs_dir2_data_aoff_t, xfs_dir2_data_aoff_t,
+                               int *, int *);
+extern void    libxfs_dir2_data_log_entry (xfs_trans_t *, xfs_dabuf_t *,
+                               xfs_dir2_data_entry_t *);
+extern void    libxfs_dir2_data_log_header (xfs_trans_t *, xfs_dabuf_t *);
+extern void    libxfs_dir2_data_freescan (xfs_mount_t *, xfs_dir2_data_t *,
+                               int *, char *);
+extern void    libxfs_dir2_free_log_bests (xfs_trans_t *, xfs_dabuf_t *,
+                               int, int);
+
+/* Shared utility routines */
+extern unsigned int    libxfs_log2_roundup(unsigned int i);
+
+
+/* ick */
+extern __inline__ __const__ __u64 __fswab64 (__u64 x);
+
+#endif /* __LIBXFS_H__ */
diff --git a/include/platform_defs.h.in b/include/platform_defs.h.in
new file mode 100644 (file)
index 0000000..9f34375
--- /dev/null
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ *
+ * @configure_input@
+ */
+#ifndef __XFS_PLATFORM_DEFS_H__
+#define __XFS_PLATFORM_DEFS_H__
+
+#include <stdio.h>
+#include <assert.h>
+#include <endian.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/param.h>
+#include <sys/types.h>
+
+#if (__GLIBC__ <= 2) && (__GLIBC_MINOR__ <= 1)
+# define constpp       const char * const *
+#else
+# define constpp       char * const *
+#endif
+
+typedef loff_t         xfs_off_t;
+typedef __uint64_t     xfs_ino_t;
+typedef __uint32_t     xfs_dev_t;
+typedef __int64_t      xfs_daddr_t;
+typedef char*          xfs_caddr_t;
+
+/* long and pointer must be either 32 bit or 64 bit */
+#undef HAVE_64BIT_LONG
+#undef HAVE_32BIT_LONG
+#undef HAVE_32BIT_PTR
+#undef HAVE_64BIT_PTR
+
+/* Check if __psint_t is set to something meaningful */
+#undef HAVE___PSINT_T
+#ifndef HAVE___PSINT_T
+# ifdef HAVE_32BIT_PTR
+typedef int __psint_t;
+# elif defined HAVE_64BIT_PTR
+#  ifdef HAVE_64BIT_LONG
+typedef long __psint_t;
+#  else
+/* This is a very strange architecture, which has 64 bit pointers but
+ * not 64 bit longs. So, I'd just punt here and assume long long is Ok */
+typedef long long __psint_t;
+#  endif
+# else
+#  error Unknown pointer size
+# endif
+#endif
+
+/* Check if __psunsigned_t is set to something meaningful */
+#undef HAVE___PSUNSIGNED_T
+#ifndef HAVE___PSUNSIGNED_T
+# ifdef HAVE_32BIT_PTR
+typedef unsigned int __psunsigned_t;
+# elif defined HAVE_64BIT_PTR
+#  ifdef HAVE_64BIT_LONG
+typedef long __psunsigned_t;
+#  else
+/* This is a very strange architecture, which has 64 bit pointers but
+ * not 64 bit longs. So, I'd just punt here and assume long long is Ok */
+typedef unsigned long long __psunsigned_t;
+#  endif
+# else
+#  error Unknown pointer size
+# endif
+#endif
+
+#ifdef DEBUG
+# define ASSERT                assert
+#else
+# define ASSERT(EX)    ((void) 0)
+#endif
+
+#endif /* __XFS_PLATFORM_DEFS_H__ */
diff --git a/include/xfs_ag.h b/include/xfs_ag.h
new file mode 100644 (file)
index 0000000..86e4095
--- /dev/null
@@ -0,0 +1,343 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_AG_H__
+#define        __XFS_AG_H__
+
+/*
+ * Allocation group header
+ * This is divided into three structures, placed in sequential 512-byte 
+ * buffers after a copy of the superblock (also in a 512-byte buffer).
+ */
+
+struct xfs_buf;
+struct xfs_mount;
+
+#define        XFS_AGF_MAGIC   0x58414746      /* 'XAGF' */
+#define        XFS_AGI_MAGIC   0x58414749      /* 'XAGI' */
+#define        XFS_AGF_VERSION 1
+#define        XFS_AGI_VERSION 1
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGF_GOOD_VERSION)
+int xfs_agf_good_version(unsigned v);
+#define        XFS_AGF_GOOD_VERSION(v) xfs_agf_good_version(v)
+#else
+#define XFS_AGF_GOOD_VERSION(v)                ((v) == XFS_AGF_VERSION)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGI_GOOD_VERSION)
+int xfs_agi_good_version(unsigned v);
+#define        XFS_AGI_GOOD_VERSION(v) xfs_agi_good_version(v)
+#else
+#define XFS_AGI_GOOD_VERSION(v)                ((v) == XFS_AGI_VERSION)
+#endif
+
+/*
+ * Btree number 0 is bno, 1 is cnt.  This value gives the size of the
+ * arrays below.
+ */
+#define        XFS_BTNUM_AGF   ((int)XFS_BTNUM_CNTi + 1)
+
+/*
+ * The second word of agf_levels in the first a.g. overlaps the EFS
+ * superblock's magic number.  Since the magic numbers valid for EFS
+ * are > 64k, our value cannot be confused for an EFS superblock's.
+ */
+
+typedef struct xfs_agf
+{
+       /*
+        * Common allocation group header information
+        */
+       __uint32_t      agf_magicnum;   /* magic number == XFS_AGF_MAGIC */
+       __uint32_t      agf_versionnum; /* header version == XFS_AGF_VERSION */
+       xfs_agnumber_t  agf_seqno;      /* sequence # starting from 0 */
+       xfs_agblock_t   agf_length;     /* size in blocks of a.g. */
+       /*
+        * Freespace information
+        */
+       xfs_agblock_t   agf_roots[XFS_BTNUM_AGF];       /* root blocks */
+       __uint32_t      agf_spare0;     /* spare field */
+       __uint32_t      agf_levels[XFS_BTNUM_AGF];      /* btree levels */
+       __uint32_t      agf_spare1;     /* spare field */
+       __uint32_t      agf_flfirst;    /* first freelist block's index */
+       __uint32_t      agf_fllast;     /* last freelist block's index */
+       __uint32_t      agf_flcount;    /* count of blocks in freelist */
+       xfs_extlen_t    agf_freeblks;   /* total free blocks */
+       xfs_extlen_t    agf_longest;    /* longest free space */
+} xfs_agf_t;
+
+#define        XFS_AGF_MAGICNUM        0x00000001
+#define        XFS_AGF_VERSIONNUM      0x00000002
+#define        XFS_AGF_SEQNO           0x00000004
+#define        XFS_AGF_LENGTH          0x00000008
+#define        XFS_AGF_ROOTS           0x00000010
+#define        XFS_AGF_LEVELS          0x00000020
+#define        XFS_AGF_FLFIRST         0x00000040
+#define        XFS_AGF_FLLAST          0x00000080
+#define        XFS_AGF_FLCOUNT         0x00000100
+#define        XFS_AGF_FREEBLKS        0x00000200
+#define        XFS_AGF_LONGEST         0x00000400
+#define        XFS_AGF_NUM_BITS        11
+#define        XFS_AGF_ALL_BITS        ((1 << XFS_AGF_NUM_BITS) - 1)
+
+/* disk block (xfs_daddr_t) in the AG */
+#define        XFS_AGF_DADDR           ((xfs_daddr_t)1)
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGF_BLOCK)
+xfs_agblock_t xfs_agf_block(struct xfs_mount *mp);
+#define        XFS_AGF_BLOCK(mp)       xfs_agf_block(mp)
+#else
+#define        XFS_AGF_BLOCK(mp)       XFS_HDR_BLOCK(mp, XFS_AGF_DADDR)
+#endif
+
+/*
+ * Size of the unlinked inode hash table in the agi.
+ */
+#define        XFS_AGI_UNLINKED_BUCKETS        64
+
+typedef struct xfs_agi
+{
+       /*
+        * Common allocation group header information
+        */
+       __uint32_t      agi_magicnum;   /* magic number == XFS_AGI_MAGIC */
+       __uint32_t      agi_versionnum; /* header version == XFS_AGI_VERSION */
+       xfs_agnumber_t  agi_seqno;      /* sequence # starting from 0 */
+       xfs_agblock_t   agi_length;     /* size in blocks of a.g. */
+       /*
+        * Inode information
+        * Inodes are mapped by interpreting the inode number, so no
+        * mapping data is needed here.
+        */
+       xfs_agino_t     agi_count;      /* count of allocated inodes */
+       xfs_agblock_t   agi_root;       /* root of inode btree */
+       __uint32_t      agi_level;      /* levels in inode btree */
+       xfs_agino_t     agi_freecount;  /* number of free inodes */
+       xfs_agino_t     agi_newino;     /* new inode just allocated */
+       xfs_agino_t     agi_dirino;     /* last directory inode chunk */
+       /*
+        * Hash table of inodes which have been unlinked but are
+        * still being referenced.
+        */
+       xfs_agino_t     agi_unlinked[XFS_AGI_UNLINKED_BUCKETS];
+} xfs_agi_t;
+
+#define        XFS_AGI_MAGICNUM        0x00000001
+#define        XFS_AGI_VERSIONNUM      0x00000002
+#define        XFS_AGI_SEQNO           0x00000004
+#define        XFS_AGI_LENGTH          0x00000008
+#define        XFS_AGI_COUNT           0x00000010
+#define        XFS_AGI_ROOT            0x00000020
+#define        XFS_AGI_LEVEL           0x00000040
+#define        XFS_AGI_FREECOUNT       0x00000080
+#define        XFS_AGI_NEWINO          0x00000100
+#define        XFS_AGI_DIRINO          0x00000200
+#define        XFS_AGI_UNLINKED        0x00000400
+#define        XFS_AGI_NUM_BITS        11
+#define        XFS_AGI_ALL_BITS        ((1 << XFS_AGI_NUM_BITS) - 1)
+
+/* disk block (xfs_daddr_t) in the AG */
+#define        XFS_AGI_DADDR           ((xfs_daddr_t)2)
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGI_BLOCK)
+xfs_agblock_t xfs_agi_block(struct xfs_mount *mp);
+#define        XFS_AGI_BLOCK(mp)       xfs_agi_block(mp)
+#else
+#define        XFS_AGI_BLOCK(mp)       XFS_HDR_BLOCK(mp, XFS_AGI_DADDR)
+#endif
+
+/*
+ * The third a.g. block contains the a.g. freelist, an array 
+ * of block pointers to blocks owned by the allocation btree code.
+ */
+#define        XFS_AGFL_DADDR          ((xfs_daddr_t)3)
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGFL_BLOCK)
+xfs_agblock_t xfs_agfl_block(struct xfs_mount *mp);
+#define        XFS_AGFL_BLOCK(mp)      xfs_agfl_block(mp)
+#else
+#define        XFS_AGFL_BLOCK(mp)      XFS_HDR_BLOCK(mp, XFS_AGFL_DADDR)
+#endif
+#define        XFS_AGFL_SIZE           (BBSIZE / sizeof(xfs_agblock_t))
+typedef        struct xfs_agfl
+{
+       xfs_agblock_t   agfl_bno[XFS_AGFL_SIZE];
+} xfs_agfl_t;
+
+/*
+ * Per-ag incore structure, copies of information in agf and agi,
+ * to improve the performance of allocation group selection.
+ */
+typedef struct xfs_perag
+{
+       char            pagf_init;      /* this agf's entry is initialized */
+       char            pagi_init;      /* this agi's entry is initialized */
+       __uint8_t       pagf_levels[XFS_BTNUM_AGF];
+                                       /* # of levels in bno & cnt btree */
+       __uint32_t      pagf_flcount;   /* count of blocks in freelist */
+       xfs_extlen_t    pagf_freeblks;  /* total free blocks */
+       xfs_extlen_t    pagf_longest;   /* longest free space */
+       xfs_agino_t     pagi_freecount; /* number of free inodes */
+} xfs_perag_t;
+
+#define        XFS_AG_MIN_BYTES        (1LL << 24)     /* 16 MB */
+#define        XFS_AG_BEST_BYTES       (1LL << 30)     /*  1 GB */
+#define        XFS_AG_MAX_BYTES        (1LL << 32)     /*  4 GB */
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AG_MIN_BLOCKS)
+xfs_extlen_t xfs_ag_min_blocks(int bl);
+#define        XFS_AG_MIN_BLOCKS(bl)           xfs_ag_min_blocks(bl)
+#else
+#define        XFS_AG_MIN_BLOCKS(bl)   ((xfs_extlen_t)(XFS_AG_MIN_BYTES >> bl))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AG_BEST_BLOCKS)
+xfs_extlen_t xfs_ag_best_blocks(int bl);
+#define        XFS_AG_BEST_BLOCKS(bl)          xfs_ag_best_blocks(bl)
+#else
+#define        XFS_AG_BEST_BLOCKS(bl)  ((xfs_extlen_t)(XFS_AG_BEST_BYTES >> bl))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AG_MAX_BLOCKS)
+xfs_extlen_t xfs_ag_max_blocks(int bl);
+#define        XFS_AG_MAX_BLOCKS(bl)           xfs_ag_max_blocks(bl)
+#else
+#define        XFS_AG_MAX_BLOCKS(bl)   ((xfs_extlen_t)(XFS_AG_MAX_BYTES >> bl))
+#endif
+
+#define        XFS_MAX_AGNUMBER        ((xfs_agnumber_t)(NULLAGNUMBER - 1))
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AG_MAXLEVELS)
+int xfs_ag_maxlevels(struct xfs_mount *mp);
+#define        XFS_AG_MAXLEVELS(mp)            xfs_ag_maxlevels(mp)
+#else
+#define        XFS_AG_MAXLEVELS(mp)    ((mp)->m_ag_maxlevels)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_MIN_FREELIST)
+int xfs_min_freelist(xfs_agf_t *a, struct xfs_mount *mp);
+#define        XFS_MIN_FREELIST(a,mp)          xfs_min_freelist(a,mp)
+#else
+#define        XFS_MIN_FREELIST(a,mp)  \
+       XFS_MIN_FREELIST_RAW(   \
+               INT_GET((a)->agf_levels[XFS_BTNUM_BNOi], ARCH_CONVERT), \
+               INT_GET((a)->agf_levels[XFS_BTNUM_CNTi], ARCH_CONVERT), mp)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_MIN_FREELIST_PAG)
+int xfs_min_freelist_pag(xfs_perag_t *pag, struct xfs_mount *mp);
+#define        XFS_MIN_FREELIST_PAG(pag,mp)    xfs_min_freelist_pag(pag,mp)
+#else
+#define        XFS_MIN_FREELIST_PAG(pag,mp)    \
+       XFS_MIN_FREELIST_RAW((pag)->pagf_levels[XFS_BTNUM_BNOi], \
+                            (pag)->pagf_levels[XFS_BTNUM_CNTi], mp)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_MIN_FREELIST_RAW)
+int xfs_min_freelist_raw(int bl, int cl, struct xfs_mount *mp);
+#define        XFS_MIN_FREELIST_RAW(bl,cl,mp)  xfs_min_freelist_raw(bl,cl,mp)
+#else
+#define        XFS_MIN_FREELIST_RAW(bl,cl,mp)  \
+       (MIN(bl + 1, XFS_AG_MAXLEVELS(mp)) + \
+        MIN(cl + 1, XFS_AG_MAXLEVELS(mp)))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGB_TO_FSB)
+xfs_fsblock_t xfs_agb_to_fsb(struct xfs_mount *mp, xfs_agnumber_t agno,
+                            xfs_agblock_t agbno);
+#define XFS_AGB_TO_FSB(mp,agno,agbno)  xfs_agb_to_fsb(mp,agno,agbno)
+#else
+#define        XFS_AGB_TO_FSB(mp,agno,agbno) \
+       (((xfs_fsblock_t)(agno) << (mp)->m_sb.sb_agblklog) | (agbno))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_FSB_TO_AGNO)
+xfs_agnumber_t xfs_fsb_to_agno(struct xfs_mount *mp, xfs_fsblock_t fsbno);
+#define        XFS_FSB_TO_AGNO(mp,fsbno)       xfs_fsb_to_agno(mp,fsbno)
+#else
+#define        XFS_FSB_TO_AGNO(mp,fsbno) \
+       ((xfs_agnumber_t)((fsbno) >> (mp)->m_sb.sb_agblklog))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_FSB_TO_AGBNO)
+xfs_agblock_t xfs_fsb_to_agbno(struct xfs_mount *mp, xfs_fsblock_t fsbno);
+#define        XFS_FSB_TO_AGBNO(mp,fsbno)      xfs_fsb_to_agbno(mp,fsbno)
+#else
+#define        XFS_FSB_TO_AGBNO(mp,fsbno) \
+       ((xfs_agblock_t)((fsbno) & XFS_MASK32LO((mp)->m_sb.sb_agblklog)))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGB_TO_DADDR)
+xfs_daddr_t xfs_agb_to_daddr(struct xfs_mount *mp, xfs_agnumber_t agno,
+                        xfs_agblock_t agbno);
+#define        XFS_AGB_TO_DADDR(mp,agno,agbno) xfs_agb_to_daddr(mp,agno,agbno)
+#else
+#define        XFS_AGB_TO_DADDR(mp,agno,agbno) \
+       ((xfs_daddr_t)(XFS_FSB_TO_BB(mp, \
+               (xfs_fsblock_t)(agno) * (mp)->m_sb.sb_agblocks + (agbno))))
+#endif
+/*
+ * XFS_DADDR_TO_AGNO and XFS_DADDR_TO_AGBNO moved to xfs_mount.h
+ * to avoid header file ordering change
+ */
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AG_DADDR)
+xfs_daddr_t xfs_ag_daddr(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_daddr_t d);
+#define        XFS_AG_DADDR(mp,agno,d)         xfs_ag_daddr(mp,agno,d)
+#else
+#define        XFS_AG_DADDR(mp,agno,d) (XFS_AGB_TO_DADDR(mp, agno, 0) + (d))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BUF_TO_AGF)
+xfs_agf_t *xfs_buf_to_agf(struct xfs_buf *bp);
+#define        XFS_BUF_TO_AGF(bp)              xfs_buf_to_agf(bp)
+#else
+#define        XFS_BUF_TO_AGF(bp)      ((xfs_agf_t *)XFS_BUF_PTR(bp))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BUF_TO_AGI)
+xfs_agi_t *xfs_buf_to_agi(struct xfs_buf *bp);
+#define        XFS_BUF_TO_AGI(bp)              xfs_buf_to_agi(bp)
+#else
+#define        XFS_BUF_TO_AGI(bp)      ((xfs_agi_t *)XFS_BUF_PTR(bp))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BUF_TO_AGFL)
+xfs_agfl_t *xfs_buf_to_agfl(struct xfs_buf *bp);
+#define        XFS_BUF_TO_AGFL(bp)             xfs_buf_to_agfl(bp)
+#else
+#define        XFS_BUF_TO_AGFL(bp)     ((xfs_agfl_t *)XFS_BUF_PTR(bp))
+#endif
+
+/*
+ * For checking for bad ranges of xfs_daddr_t's, covering multiple
+ * allocation groups or a single xfs_daddr_t that's a superblock copy.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AG_CHECK_DADDR)
+void xfs_ag_check_daddr(struct xfs_mount *mp, xfs_daddr_t d, xfs_extlen_t len);
+#define        XFS_AG_CHECK_DADDR(mp,d,len)    xfs_ag_check_daddr(mp,d,len)
+#else
+#define        XFS_AG_CHECK_DADDR(mp,d,len)    \
+       ((len) == 1 ? \
+           ASSERT((d) == XFS_SB_DADDR || \
+                  XFS_DADDR_TO_AGBNO(mp, d) != XFS_SB_DADDR) : \
+           ASSERT(XFS_DADDR_TO_AGNO(mp, d) == \
+                  XFS_DADDR_TO_AGNO(mp, (d) + (len) - 1)))
+#endif
+
+#endif /* __XFS_AG_H__ */
diff --git a/include/xfs_alloc.h b/include/xfs_alloc.h
new file mode 100644 (file)
index 0000000..55a2efa
--- /dev/null
@@ -0,0 +1,200 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_ALLOC_H__
+#define        __XFS_ALLOC_H__
+
+struct xfs_buf;
+struct xfs_mount;
+struct xfs_perag;
+struct xfs_trans;
+
+/*
+ * Freespace allocation types.  Argument to xfs_alloc_[v]extent.
+ */
+typedef enum xfs_alloctype
+{
+       XFS_ALLOCTYPE_ANY_AG,           /* allocate anywhere, use rotor */
+       XFS_ALLOCTYPE_FIRST_AG,         /* ... start at ag 0 */
+       XFS_ALLOCTYPE_START_AG,         /* anywhere, start in this a.g. */
+       XFS_ALLOCTYPE_THIS_AG,          /* anywhere in this a.g. */
+       XFS_ALLOCTYPE_START_BNO,        /* near this block else anywhere */
+       XFS_ALLOCTYPE_NEAR_BNO,         /* in this a.g. and near this block */
+       XFS_ALLOCTYPE_THIS_BNO          /* at exactly this block */
+} xfs_alloctype_t;
+
+/*
+ * Flags for xfs_alloc_fix_freelist.
+ */
+#define        XFS_ALLOC_FLAG_TRYLOCK  0x00000001  /* use trylock for buffer locking */
+
+/*
+ * Argument structure for xfs_alloc routines.
+ * This is turned into a structure to avoid having 20 arguments passed
+ * down several levels of the stack.
+ */
+typedef struct xfs_alloc_arg {
+       struct xfs_trans *tp;           /* transaction pointer */
+       struct xfs_mount *mp;           /* file system mount point */
+       struct xfs_buf  *agbp;          /* buffer for a.g. freelist header */
+       struct xfs_perag *pag;          /* per-ag struct for this agno */
+       xfs_fsblock_t   fsbno;          /* file system block number */
+       xfs_agnumber_t  agno;           /* allocation group number */
+       xfs_agblock_t   agbno;          /* allocation group-relative block # */
+       xfs_extlen_t    minlen;         /* minimum size of extent */
+       xfs_extlen_t    maxlen;         /* maximum size of extent */
+       xfs_extlen_t    mod;            /* mod value for extent size */
+       xfs_extlen_t    prod;           /* prod value for extent size */
+       xfs_extlen_t    minleft;        /* min blocks must be left after us */
+       xfs_extlen_t    total;          /* total blocks needed in xaction */
+       xfs_extlen_t    alignment;      /* align answer to multiple of this */
+       xfs_extlen_t    minalignslop;   /* slop for minlen+alignment calcs */
+       xfs_extlen_t    len;            /* output: actual size of extent */
+       xfs_alloctype_t type;           /* allocation type XFS_ALLOCTYPE_... */
+       xfs_alloctype_t otype;          /* original allocation type */
+       char            wasdel;         /* set if allocation was prev delayed */
+       char            wasfromfl;      /* set if allocation is from freelist */
+       char            isfl;           /* set if is freelist blocks - !actg */
+       char            userdata;       /* set if this is user data */
+} xfs_alloc_arg_t;
+
+
+#ifdef __KERNEL__
+
+/*
+ * Types for alloc tracing.
+ */
+#define        XFS_ALLOC_KTRACE_ALLOC  1
+#define        XFS_ALLOC_KTRACE_FREE   2
+#define        XFS_ALLOC_KTRACE_MODAGF 3
+/*
+ * Allocation tracing buffer size.
+ */
+#define        XFS_ALLOC_TRACE_SIZE    4096
+
+#ifdef XFS_ALL_TRACE
+#define        XFS_ALLOC_TRACE
+#endif
+
+#if !defined(DEBUG)
+#undef XFS_ALLOC_TRACE
+#endif
+
+/*
+ * Prototypes for visible xfs_alloc.c routines
+ */
+
+/*
+ * Compute and fill in value of m_ag_maxlevels.
+ */
+void
+xfs_alloc_compute_maxlevels(
+       struct xfs_mount        *mp);   /* file system mount structure */
+
+/*
+ * Decide whether to use this allocation group for this allocation.
+ * If so, fix up the btree freelist's size.
+ * This is external so mkfs can call it, too.
+ */
+int                            /* error */
+xfs_alloc_fix_freelist(
+       xfs_alloc_arg_t *args,  /* allocation argument structure */
+       int             flags); /* XFS_ALLOC_FLAG_... */
+
+/*
+ * Get a block from the freelist.
+ * Returns with the buffer for the block gotten.
+ */
+int                            /* error */
+xfs_alloc_get_freelist(
+       struct xfs_trans *tp,   /* transaction pointer */
+       struct xfs_buf  *agbp,  /* buffer containing the agf structure */
+       xfs_agblock_t   *bnop); /* block address retrieved from freelist */
+
+/*
+ * Log the given fields from the agf structure.
+ */
+void
+xfs_alloc_log_agf(
+       struct xfs_trans *tp,   /* transaction pointer */
+       struct xfs_buf  *bp,    /* buffer for a.g. freelist header */
+       int             fields);/* mask of fields to be logged (XFS_AGF_...) */
+
+/*
+ * Interface for inode allocation to force the pag data to be initialized.
+ */
+int                            /* error */
+xfs_alloc_pagf_init(
+       struct xfs_mount *mp,   /* file system mount structure */
+       struct xfs_trans *tp,   /* transaction pointer */
+       xfs_agnumber_t  agno,   /* allocation group number */
+       int             flags); /* XFS_ALLOC_FLAGS_... */
+
+/*
+ * Put the block on the freelist for the allocation group.
+ */
+int                            /* error */
+xfs_alloc_put_freelist(
+       struct xfs_trans *tp,   /* transaction pointer */
+       struct xfs_buf  *agbp,  /* buffer for a.g. freelist header */
+       struct xfs_buf  *agflbp,/* buffer for a.g. free block array */
+       xfs_agblock_t   bno);   /* block being freed */
+
+/*
+ * Read in the allocation group header (free/alloc section).
+ */
+int                                    /* error  */
+xfs_alloc_read_agf(
+       struct xfs_mount *mp,           /* mount point structure */
+       struct xfs_trans *tp,           /* transaction pointer */
+       xfs_agnumber_t  agno,           /* allocation group number */
+       int             flags,          /* XFS_ALLOC_FLAG_... */
+       struct xfs_buf  **bpp);         /* buffer for the ag freelist header */
+
+/*
+ * Allocate an extent (variable-size).
+ */
+int                            /* error */
+xfs_alloc_vextent(
+       xfs_alloc_arg_t *args); /* allocation argument structure */
+
+/*
+ * Free an extent.
+ */
+int                            /* error */
+xfs_free_extent(
+       struct xfs_trans *tp,   /* transaction pointer */
+       xfs_fsblock_t   bno,    /* starting block number of extent */
+       xfs_extlen_t    len);   /* length of extent */
+
+#endif /* __KERNEL__ */
+
+#endif /* __XFS_ALLOC_H__ */
diff --git a/include/xfs_alloc_btree.h b/include/xfs_alloc_btree.h
new file mode 100644 (file)
index 0000000..7cd1a87
--- /dev/null
@@ -0,0 +1,251 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_ALLOC_BTREE_H__
+#define        __XFS_ALLOC_BTREE_H__
+
+/*
+ * Freespace on-disk structures
+ */
+
+struct xfs_buf;
+struct xfs_btree_cur;
+struct xfs_btree_sblock;
+struct xfs_mount;
+
+/*
+ * There are two on-disk btrees, one sorted by blockno and one sorted
+ * by blockcount and blockno.  All blocks look the same to make the code
+ * simpler; if we have time later, we'll make the optimizations.
+ */
+#define        XFS_ABTB_MAGIC  0x41425442      /* 'ABTB' for bno tree */
+#define        XFS_ABTC_MAGIC  0x41425443      /* 'ABTC' for cnt tree */
+
+/*
+ * Data record/key structure
+ */
+typedef struct xfs_alloc_rec
+{
+       xfs_agblock_t   ar_startblock;  /* starting block number */
+       xfs_extlen_t    ar_blockcount;  /* count of free blocks */
+} xfs_alloc_rec_t, xfs_alloc_key_t;
+
+typedef xfs_agblock_t xfs_alloc_ptr_t; /* btree pointer type */
+                                       /* btree block header type */
+typedef        struct xfs_btree_sblock xfs_alloc_block_t;
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BUF_TO_ALLOC_BLOCK)
+xfs_alloc_block_t *xfs_buf_to_alloc_block(struct xfs_buf *bp);
+#define        XFS_BUF_TO_ALLOC_BLOCK(bp)      xfs_buf_to_alloc_block(bp)
+#else
+#define        XFS_BUF_TO_ALLOC_BLOCK(bp) ((xfs_alloc_block_t *)(XFS_BUF_PTR(bp)))
+#endif
+
+/*
+ * Real block structures have a size equal to the disk block size.
+ */
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ALLOC_BLOCK_SIZE)
+int xfs_alloc_block_size(int lev, struct xfs_btree_cur *cur);
+#define        XFS_ALLOC_BLOCK_SIZE(lev,cur)   xfs_alloc_block_size(lev,cur)
+#else
+#define        XFS_ALLOC_BLOCK_SIZE(lev,cur)   (1 << (cur)->bc_blocklog)
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ALLOC_BLOCK_MAXRECS)
+int xfs_alloc_block_maxrecs(int lev, struct xfs_btree_cur *cur);
+#define        XFS_ALLOC_BLOCK_MAXRECS(lev,cur)        xfs_alloc_block_maxrecs(lev,cur)
+#else
+#define        XFS_ALLOC_BLOCK_MAXRECS(lev,cur)        \
+       ((cur)->bc_mp->m_alloc_mxr[lev != 0])
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ALLOC_BLOCK_MINRECS)
+int xfs_alloc_block_minrecs(int lev, struct xfs_btree_cur *cur);
+#define        XFS_ALLOC_BLOCK_MINRECS(lev,cur)        xfs_alloc_block_minrecs(lev,cur)
+#else
+#define        XFS_ALLOC_BLOCK_MINRECS(lev,cur)        \
+       ((cur)->bc_mp->m_alloc_mnr[lev != 0])
+#endif
+
+/*
+ * Minimum and maximum blocksize.
+ * The blocksize upper limit is pretty much arbitrary.
+ */
+#define        XFS_MIN_BLOCKSIZE_LOG   9       /* i.e. 512 bytes */
+#define        XFS_MAX_BLOCKSIZE_LOG   16      /* i.e. 65536 bytes */
+#define        XFS_MIN_BLOCKSIZE       (1 << XFS_MIN_BLOCKSIZE_LOG)
+#define        XFS_MAX_BLOCKSIZE       (1 << XFS_MAX_BLOCKSIZE_LOG)
+
+/*
+ * block numbers in the AG; SB is BB 0, AGF is BB 1, AGI is BB 2, AGFL is BB 3
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BNO_BLOCK)
+xfs_agblock_t xfs_bno_block(struct xfs_mount *mp);
+#define        XFS_BNO_BLOCK(mp)       xfs_bno_block(mp)
+#else
+#define        XFS_BNO_BLOCK(mp)       ((xfs_agblock_t)(XFS_AGFL_BLOCK(mp) + 1))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_CNT_BLOCK)
+xfs_agblock_t xfs_cnt_block(struct xfs_mount *mp);
+#define        XFS_CNT_BLOCK(mp)       xfs_cnt_block(mp)
+#else
+#define        XFS_CNT_BLOCK(mp)       ((xfs_agblock_t)(XFS_BNO_BLOCK(mp) + 1))
+#endif
+
+/*
+ * Record, key, and pointer address macros for btree blocks.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ALLOC_REC_ADDR)
+xfs_alloc_rec_t *xfs_alloc_rec_addr(xfs_alloc_block_t *bb, int i,
+                                   struct xfs_btree_cur *cur);
+#define        XFS_ALLOC_REC_ADDR(bb,i,cur)    xfs_alloc_rec_addr(bb,i,cur)
+#else
+#define        XFS_ALLOC_REC_ADDR(bb,i,cur)    \
+       XFS_BTREE_REC_ADDR(XFS_ALLOC_BLOCK_SIZE(0,cur), xfs_alloc, bb, i, \
+               XFS_ALLOC_BLOCK_MAXRECS(0, cur))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ALLOC_KEY_ADDR)
+xfs_alloc_key_t *xfs_alloc_key_addr(xfs_alloc_block_t *bb, int i,
+                                   struct xfs_btree_cur *cur);
+#define        XFS_ALLOC_KEY_ADDR(bb,i,cur)    xfs_alloc_key_addr(bb,i,cur)
+#else
+#define        XFS_ALLOC_KEY_ADDR(bb,i,cur)    \
+       XFS_BTREE_KEY_ADDR(XFS_ALLOC_BLOCK_SIZE(1,cur), xfs_alloc, bb, i, \
+               XFS_ALLOC_BLOCK_MAXRECS(1, cur))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ALLOC_PTR_ADDR)
+xfs_alloc_ptr_t *xfs_alloc_ptr_addr(xfs_alloc_block_t *bb, int i,
+                                   struct xfs_btree_cur *cur);
+#define        XFS_ALLOC_PTR_ADDR(bb,i,cur)    xfs_alloc_ptr_addr(bb,i,cur)
+#else
+#define        XFS_ALLOC_PTR_ADDR(bb,i,cur)    \
+       XFS_BTREE_PTR_ADDR(XFS_ALLOC_BLOCK_SIZE(1,cur), xfs_alloc, bb, i, \
+               XFS_ALLOC_BLOCK_MAXRECS(1, cur))
+#endif
+
+/*
+ * Prototypes for externally visible routines.
+ */
+
+/*
+ * Decrement cursor by one record at the level.
+ * For nonzero levels the leaf-ward information is untouched.
+ */
+int                                    /* error */
+xfs_alloc_decrement(
+       struct xfs_btree_cur    *cur,   /* btree cursor */
+       int                     level,  /* level in btree, 0 is leaf */
+       int                     *stat); /* success/failure */
+
+/*
+ * Delete the record pointed to by cur.
+ * The cursor refers to the place where the record was (could be inserted)
+ * when the operation returns.
+ */
+int                                    /* error */
+xfs_alloc_delete(
+       struct xfs_btree_cur    *cur,   /* btree cursor */
+       int                     *stat); /* success/failure */
+
+/*
+ * Get the data from the pointed-to record.
+ */
+int                                    /* error */
+xfs_alloc_get_rec(
+       struct xfs_btree_cur    *cur,   /* btree cursor */
+       xfs_agblock_t           *bno,   /* output: starting block of extent */
+       xfs_extlen_t            *len,   /* output: length of extent */
+       int                     *stat); /* output: success/failure */
+
+/*
+ * Increment cursor by one record at the level.
+ * For nonzero levels the leaf-ward information is untouched.
+ */
+int                                    /* error */
+xfs_alloc_increment(
+       struct xfs_btree_cur    *cur,   /* btree cursor */
+       int                     level,  /* level in btree, 0 is leaf */
+       int                     *stat); /* success/failure */
+
+/*
+ * Insert the current record at the point referenced by cur.
+ * The cursor may be inconsistent on return if splits have been done.
+ */
+int                                    /* error */
+xfs_alloc_insert(
+       struct xfs_btree_cur    *cur,   /* btree cursor */
+       int                     *stat); /* success/failure */
+
+/*
+ * Lookup the record equal to [bno, len] in the btree given by cur.
+ */
+int                                    /* error */
+xfs_alloc_lookup_eq(
+       struct xfs_btree_cur    *cur,   /* btree cursor */
+       xfs_agblock_t           bno,    /* starting block of extent */
+       xfs_extlen_t            len,    /* length of extent */
+       int                     *stat); /* success/failure */
+
+/*
+ * Lookup the first record greater than or equal to [bno, len]
+ * in the btree given by cur.
+ */
+int                                    /* error */
+xfs_alloc_lookup_ge(
+       struct xfs_btree_cur    *cur,   /* btree cursor */
+       xfs_agblock_t           bno,    /* starting block of extent */
+       xfs_extlen_t            len,    /* length of extent */
+       int                     *stat); /* success/failure */
+
+/*
+ * Lookup the first record less than or equal to [bno, len]
+ * in the btree given by cur.
+ */
+int                                    /* error */
+xfs_alloc_lookup_le(
+       struct xfs_btree_cur    *cur,   /* btree cursor */
+       xfs_agblock_t           bno,    /* starting block of extent */
+       xfs_extlen_t            len,    /* length of extent */
+       int                     *stat); /* success/failure */
+/*
+ * Update the record referred to by cur, to the value given by [bno, len].
+ * This either works (return 0) or gets an EFSCORRUPTED error.
+ */
+int                                    /* error */
+xfs_alloc_update(
+       struct xfs_btree_cur    *cur,   /* btree cursor */
+       xfs_agblock_t           bno,    /* starting block of extent */
+       xfs_extlen_t            len);   /* length of extent */
+
+#endif /* __XFS_ALLOC_BTREE_H__ */
diff --git a/include/xfs_arch.h b/include/xfs_arch.h
new file mode 100644 (file)
index 0000000..9a01381
--- /dev/null
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_ARCH_H__
+#define __XFS_ARCH_H__
+
+#ifndef XFS_BIG_FILESYSTEMS
+#error XFS_BIG_FILESYSTEMS must be defined true or false
+#endif
+    
+#define DIRINO4_GET_ARCH(pointer,arch) \
+    ( ((arch) == ARCH_NOCONVERT) \
+        ? \
+            (INT_GET_UNALIGNED_32(pointer)) \
+        : \
+            (INT_GET_UNALIGNED_32_BE(pointer)) \
+    )
+    
+#if XFS_BIG_FILESYSTEMS
+#define DIRINO_GET_ARCH(pointer,arch) \
+    ( ((arch) == ARCH_NOCONVERT) \
+        ? \
+            (INT_GET_UNALIGNED_64(pointer)) \
+        : \
+            (INT_GET_UNALIGNED_64_BE(pointer)) \
+    )
+#else
+/* MACHINE ARCHITECTURE dependent */
+#if __BYTE_ORDER == __LITTLE_ENDIAN 
+#define DIRINO_GET_ARCH(pointer,arch) \
+    DIRINO4_GET_ARCH((((__u8*)pointer)+4),arch)
+#else
+#define DIRINO_GET_ARCH(pointer,arch) \
+    DIRINO4_GET_ARCH(pointer,arch)
+#endif
+#endif    
+
+#define DIRINO_COPY_ARCH(from,to,arch) \
+    if ((arch) == ARCH_NOCONVERT) { \
+        bcopy(from,to,sizeof(xfs_ino_t)); \
+    } else { \
+        INT_SWAP_UNALIGNED_64(from,to); \
+    }
+#define DIRINO4_COPY_ARCH(from,to,arch) \
+    if ((arch) == ARCH_NOCONVERT) { \
+        bcopy((((__u8*)from+4)),to,sizeof(xfs_dir2_ino4_t)); \
+    } else { \
+        INT_SWAP_UNALIGNED_32(from,to); \
+    }
+
+#endif /* __XFS_ARCH_H__ */
diff --git a/include/xfs_attr_leaf.h b/include/xfs_attr_leaf.h
new file mode 100644 (file)
index 0000000..41d63b5
--- /dev/null
@@ -0,0 +1,305 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_ATTR_LEAF_H__
+#define        __XFS_ATTR_LEAF_H__
+
+/*
+ * Attribute storage layout, internal structure, access macros, etc.
+ *
+ * Attribute lists are structured around Btrees where all the data
+ * elements are in the leaf nodes.  Attribute names are hashed into an int,
+ * then that int is used as the index into the Btree.  Since the hashval
+ * of an attribute name may not be unique, we may have duplicate keys.  The
+ * internal links in the Btree are logical block offsets into the file.
+ */
+
+struct attrlist;
+struct attrlist_cursor_kern;
+struct xfs_dabuf;
+struct xfs_da_args;
+struct xfs_da_state;
+struct xfs_da_state_blk;
+struct xfs_inode;
+struct xfs_trans;
+
+/*========================================================================
+ * Attribute structure when equal to XFS_LBSIZE(mp) bytes.
+ *========================================================================*/
+
+/*
+ * This is the structure of the leaf nodes in the Btree.
+ *
+ * Struct leaf_entry's are packed from the top.  Name/values grow from the
+ * bottom but are not packed.  The freemap contains run-length-encoded entries
+ * for the free bytes after the leaf_entry's, but only the N largest such,
+ * smaller runs are dropped.  When the freemap doesn't show enough space
+ * for an allocation, we compact the name/value area and try again.  If we
+ * still don't have enough space, then we have to split the block.  The
+ * name/value structs (both local and remote versions) must be 32bit aligned.
+ *
+ * Since we have duplicate hash keys, for each key that matches, compare
+ * the actual name string.  The root and intermediate node search always
+ * takes the first-in-the-block key match found, so we should only have
+ * to work "forw"ard.  If none matches, continue with the "forw"ard leaf
+ * nodes until the hash key changes or the attribute name is found.
+ *
+ * We store the fact that an attribute is a ROOT versus USER attribute in
+ * the leaf_entry.  The namespaces are independent only because we also look
+ * at the root/user bit when we are looking for a matching attribute name.
+ *
+ * We also store a "incomplete" bit in the leaf_entry.  It shows that an
+ * attribute is in the middle of being created and should not be shown to
+ * the user if we crash during the time that the bit is set.  We clear the
+ * bit when we have finished setting up the attribute.  We do this because
+ * we cannot create some large attributes inside a single transaction, and we
+ * need some indication that we weren't finished if we crash in the middle.
+ */
+#define XFS_ATTR_LEAF_MAPSIZE  3       /* how many freespace slots */
+
+typedef struct xfs_attr_leafblock {
+       struct xfs_attr_leaf_hdr {      /* constant-structure header block */
+               xfs_da_blkinfo_t info;  /* block type, links, etc. */
+               __uint16_t count;       /* count of active leaf_entry's */
+               __uint16_t usedbytes;   /* num bytes of names/values stored */
+               __uint16_t firstused;   /* first used byte in name area */
+               __uint8_t  holes;       /* != 0 if blk needs compaction */
+               __uint8_t  pad1;
+               struct xfs_attr_leaf_map {        /* RLE map of free bytes */
+                       __uint16_t base;          /* base of free region */
+                       __uint16_t size;          /* length of free region */
+               } freemap[XFS_ATTR_LEAF_MAPSIZE]; /* N largest free regions */
+       } hdr;
+       struct xfs_attr_leaf_entry {    /* sorted on key, not name */
+               xfs_dahash_t hashval;   /* hash value of name */
+               __uint16_t nameidx;     /* index into buffer of name/value */
+               __uint8_t flags;        /* LOCAL, ROOT and INCOMPLETE flags */
+               __uint8_t pad2;         /* unused pad byte */
+       } entries[1];                   /* variable sized array */
+       struct xfs_attr_leaf_name_local {
+               __uint16_t valuelen;    /* number of bytes in value */
+               __uint8_t namelen;      /* length of name bytes */
+               __uint8_t nameval[1];   /* name/value bytes */
+       } namelist;                     /* grows from bottom of buf */
+       struct xfs_attr_leaf_name_remote {
+               xfs_dablk_t valueblk;   /* block number of value bytes */
+               __uint32_t valuelen;    /* number of bytes in value */
+               __uint8_t namelen;      /* length of name bytes */
+               __uint8_t name[1];      /* name bytes */
+       } valuelist;                    /* grows from bottom of buf */
+} xfs_attr_leafblock_t;
+typedef struct xfs_attr_leaf_hdr xfs_attr_leaf_hdr_t;
+typedef struct xfs_attr_leaf_map xfs_attr_leaf_map_t;
+typedef struct xfs_attr_leaf_entry xfs_attr_leaf_entry_t;
+typedef struct xfs_attr_leaf_name_local xfs_attr_leaf_name_local_t;
+typedef struct xfs_attr_leaf_name_remote xfs_attr_leaf_name_remote_t;
+
+/*
+ * Flags used in the leaf_entry[i].flags field.
+ * NOTE: the INCOMPLETE bit must not collide with the flags bits specified
+ * on the system call, they are "or"ed together for various operations.
+ */
+#define        XFS_ATTR_LOCAL_BIT      0       /* attr is stored locally */
+#define        XFS_ATTR_ROOT_BIT       1       /* limit access to attr to userid 0 */
+#define        XFS_ATTR_INCOMPLETE_BIT 7       /* attr in middle of create/delete */
+#define XFS_ATTR_LOCAL         (1 << XFS_ATTR_LOCAL_BIT)
+#define XFS_ATTR_ROOT          (1 << XFS_ATTR_ROOT_BIT)
+#define XFS_ATTR_INCOMPLETE    (1 << XFS_ATTR_INCOMPLETE_BIT)
+
+/*
+ * Alignment for namelist and valuelist entries (since they are mixed
+ * there can be only one alignment value)
+ */
+#define        XFS_ATTR_LEAF_NAME_ALIGN        ((uint)sizeof(xfs_dablk_t))
+
+/*
+ * Cast typed pointers for "local" and "remote" name/value structs.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ATTR_LEAF_NAME_REMOTE)
+xfs_attr_leaf_name_remote_t *
+xfs_attr_leaf_name_remote(xfs_attr_leafblock_t *leafp, int idx);
+#define XFS_ATTR_LEAF_NAME_REMOTE(leafp,idx)   \
+       xfs_attr_leaf_name_remote(leafp,idx)
+#else
+#define XFS_ATTR_LEAF_NAME_REMOTE(leafp,idx)   /* remote name struct ptr */ \
+       ((xfs_attr_leaf_name_remote_t *)                \
+        &((char *)(leafp))[ INT_GET((leafp)->entries[idx].nameidx, ARCH_CONVERT) ])
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ATTR_LEAF_NAME_LOCAL)
+xfs_attr_leaf_name_local_t *
+xfs_attr_leaf_name_local(xfs_attr_leafblock_t *leafp, int idx);
+#define XFS_ATTR_LEAF_NAME_LOCAL(leafp,idx)    \
+       xfs_attr_leaf_name_local(leafp,idx)
+#else
+#define XFS_ATTR_LEAF_NAME_LOCAL(leafp,idx)    /* local name struct ptr */ \
+       ((xfs_attr_leaf_name_local_t *)         \
+        &((char *)(leafp))[ INT_GET((leafp)->entries[idx].nameidx, ARCH_CONVERT) ])
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ATTR_LEAF_NAME)
+char *xfs_attr_leaf_name(xfs_attr_leafblock_t *leafp, int idx);
+#define XFS_ATTR_LEAF_NAME(leafp,idx)          xfs_attr_leaf_name(leafp,idx)
+#else
+#define XFS_ATTR_LEAF_NAME(leafp,idx)          /* generic name struct ptr */ \
+       (&((char *)(leafp))[ INT_GET((leafp)->entries[idx].nameidx, ARCH_CONVERT) ])
+#endif
+
+/*
+ * Calculate total bytes used (including trailing pad for alignment) for
+ * a "local" name/value structure, a "remote" name/value structure, and
+ * a pointer which might be either.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ATTR_LEAF_ENTSIZE_REMOTE)
+int xfs_attr_leaf_entsize_remote(int nlen);
+#define XFS_ATTR_LEAF_ENTSIZE_REMOTE(nlen)     \
+       xfs_attr_leaf_entsize_remote(nlen)
+#else
+#define XFS_ATTR_LEAF_ENTSIZE_REMOTE(nlen)     /* space for remote struct */ \
+       (((uint)sizeof(xfs_attr_leaf_name_remote_t) - 1 + (nlen) + \
+         XFS_ATTR_LEAF_NAME_ALIGN - 1) & ~(XFS_ATTR_LEAF_NAME_ALIGN - 1))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ATTR_LEAF_ENTSIZE_LOCAL)
+int xfs_attr_leaf_entsize_local(int nlen, int vlen);
+#define XFS_ATTR_LEAF_ENTSIZE_LOCAL(nlen,vlen) \
+       xfs_attr_leaf_entsize_local(nlen,vlen)
+#else
+#define XFS_ATTR_LEAF_ENTSIZE_LOCAL(nlen,vlen) /* space for local struct */ \
+       (((uint)sizeof(xfs_attr_leaf_name_local_t) - 1 + (nlen) + (vlen) + \
+         XFS_ATTR_LEAF_NAME_ALIGN - 1) & ~(XFS_ATTR_LEAF_NAME_ALIGN - 1))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ATTR_LEAF_ENTSIZE_LOCAL_MAX)
+int xfs_attr_leaf_entsize_local_max(int bsize);
+#define XFS_ATTR_LEAF_ENTSIZE_LOCAL_MAX(bsize) \
+       xfs_attr_leaf_entsize_local_max(bsize)
+#else
+#define XFS_ATTR_LEAF_ENTSIZE_LOCAL_MAX(bsize) /* max local struct size */ \
+       (((bsize) >> 1) + ((bsize) >> 2))
+#endif
+
+
+/*========================================================================
+ * Structure used to pass context around among the routines.
+ *========================================================================*/
+
+typedef struct xfs_attr_list_context {
+       struct xfs_inode                *dp;    /* inode */
+       struct attrlist_cursor_kern     *cursor;/* position in list */
+       struct attrlist                 *alist; /* output buffer */
+       int                             count;  /* num used entries */
+       int                             dupcnt; /* count dup hashvals seen */
+       int                             bufsize;/* total buffer size */
+       int                             firstu; /* first used byte in buffer */
+       int                             flags;  /* from VOP call */
+       int                             resynch;/* T/F: resynch with cursor */
+} xfs_attr_list_context_t;
+
+/*
+ * Used to keep a list of "remote value" extents when unlinking an inode.
+ */
+typedef struct xfs_attr_inactive_list {
+       xfs_dablk_t     valueblk;       /* block number of value bytes */
+       int             valuelen;       /* number of bytes in value */
+} xfs_attr_inactive_list_t;
+
+
+/*========================================================================
+ * Function prototypes for the kernel.
+ *========================================================================*/
+
+/*
+ * Internal routines when dirsize < XFS_LITINO(mp).
+ */
+int    xfs_attr_shortform_create(struct xfs_da_args *args);
+int    xfs_attr_shortform_add(struct xfs_da_args *add);
+int    xfs_attr_shortform_lookup(struct xfs_da_args *args);
+int    xfs_attr_shortform_getvalue(struct xfs_da_args *args);
+int    xfs_attr_shortform_to_leaf(struct xfs_da_args *args);
+int    xfs_attr_shortform_remove(struct xfs_da_args *remove);
+int    xfs_attr_shortform_list(struct xfs_attr_list_context *context);
+int    xfs_attr_shortform_replace(struct xfs_da_args *args);
+int    xfs_attr_shortform_allfit(struct xfs_dabuf *bp, struct xfs_inode *dp);
+
+/*
+ * Internal routines when dirsize == XFS_LBSIZE(mp).
+ */
+int    xfs_attr_leaf_to_node(struct xfs_da_args *args);
+int    xfs_attr_leaf_to_shortform(struct xfs_dabuf *bp,
+                                         struct xfs_da_args *args);
+int    xfs_attr_leaf_clearflag(struct xfs_da_args *args);
+int    xfs_attr_leaf_setflag(struct xfs_da_args *args);
+int    xfs_attr_leaf_flipflags(xfs_da_args_t *args);
+
+/*
+ * Routines used for growing the Btree.
+ */
+int    xfs_attr_leaf_create(struct xfs_da_args *args, xfs_dablk_t which_block,
+                                   struct xfs_dabuf **bpp);
+int    xfs_attr_leaf_split(struct xfs_da_state *state,
+                                  struct xfs_da_state_blk *oldblk,
+                                  struct xfs_da_state_blk *newblk);
+int    xfs_attr_leaf_lookup_int(struct xfs_dabuf *leaf,
+                                       struct xfs_da_args *args);
+int    xfs_attr_leaf_getvalue(struct xfs_dabuf *bp, struct xfs_da_args *args);
+int    xfs_attr_leaf_add(struct xfs_dabuf *leaf_buffer,
+                                struct xfs_da_args *args);
+int    xfs_attr_leaf_remove(struct xfs_dabuf *leaf_buffer,
+                                   struct xfs_da_args *args);
+int    xfs_attr_leaf_list_int(struct xfs_dabuf *bp,
+                                     struct xfs_attr_list_context *context);
+
+/*
+ * Routines used for shrinking the Btree.
+ */
+int    xfs_attr_leaf_toosmall(struct xfs_da_state *state, int *retval);
+void   xfs_attr_leaf_unbalance(struct xfs_da_state *state,
+                                      struct xfs_da_state_blk *drop_blk,
+                                      struct xfs_da_state_blk *save_blk);
+int    xfs_attr_root_inactive(struct xfs_trans **trans, struct xfs_inode *dp);
+int    xfs_attr_node_inactive(struct xfs_trans **trans, struct xfs_inode *dp,
+                                     struct xfs_dabuf *bp, int level);
+int    xfs_attr_leaf_inactive(struct xfs_trans **trans, struct xfs_inode *dp,
+                                     struct xfs_dabuf *bp);
+int    xfs_attr_leaf_freextent(struct xfs_trans **trans, struct xfs_inode *dp,
+                                      xfs_dablk_t blkno, int blkcnt);
+
+/*
+ * Utility routines.
+ */
+xfs_dahash_t   xfs_attr_leaf_lasthash(struct xfs_dabuf *bp, int *count);
+int    xfs_attr_leaf_order(struct xfs_dabuf *leaf1_bp,
+                                  struct xfs_dabuf *leaf2_bp);
+int    xfs_attr_leaf_newentsize(struct xfs_da_args *args, int blocksize,
+                                       int *local);
+int    xfs_attr_leaf_entsize(struct xfs_attr_leafblock *leaf, int index);
+int    xfs_attr_put_listent(struct xfs_attr_list_context *context,
+                                   char *name, int namelen, int valuelen);
+int    xfs_attr_rolltrans(struct xfs_trans **transp, struct xfs_inode *dp);
+
+#endif /* __XFS_ATTR_LEAF_H__ */
diff --git a/include/xfs_attr_sf.h b/include/xfs_attr_sf.h
new file mode 100644 (file)
index 0000000..c5106f8
--- /dev/null
@@ -0,0 +1,156 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_ATTR_SF_H__
+#define        __XFS_ATTR_SF_H__
+
+/*
+ * Attribute storage when stored inside the inode.
+ *
+ * Small attribute lists are packed as tightly as possible so as
+ * to fit into the literal area of the inode.
+ */
+
+struct xfs_inode;
+
+/*
+ * Entries are packed toward the top as tight as possible.
+ */
+typedef struct xfs_attr_shortform {
+       struct xfs_attr_sf_hdr {        /* constant-structure header block */
+               __uint16_t totsize;     /* total bytes in shortform list */
+               __uint8_t count;        /* count of active entries */
+       } hdr;
+       struct xfs_attr_sf_entry {
+               __uint8_t namelen;      /* actual length of name (no NULL) */
+               __uint8_t valuelen;     /* actual length of value (no NULL) */
+               __uint8_t flags;        /* flags bits (see xfs_attr_leaf.h) */
+               __uint8_t nameval[1];   /* name & value bytes concatenated */
+       } list[1];                      /* variable sized array */
+} xfs_attr_shortform_t;
+typedef struct xfs_attr_sf_hdr xfs_attr_sf_hdr_t;
+typedef struct xfs_attr_sf_entry xfs_attr_sf_entry_t;
+
+/*
+ * We generate this then sort it, attr_list() must return things in hash-order.
+ */
+typedef struct xfs_attr_sf_sort {
+       __uint8_t       entno;          /* entry number in original list */
+       __uint8_t       namelen;        /* length of name value (no null) */
+       __uint8_t       valuelen;       /* length of value */
+       xfs_dahash_t    hash;           /* this entry's hash value */
+       char            *name;          /* name value, pointer into buffer */
+} xfs_attr_sf_sort_t;
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ATTR_SF_ENTSIZE_BYNAME)
+int xfs_attr_sf_entsize_byname(int nlen, int vlen);
+#define XFS_ATTR_SF_ENTSIZE_BYNAME(nlen,vlen)  \
+       xfs_attr_sf_entsize_byname(nlen,vlen)
+#else
+#define XFS_ATTR_SF_ENTSIZE_BYNAME(nlen,vlen)  /* space name/value uses */ \
+       ((int)sizeof(xfs_attr_sf_entry_t)-1 + (nlen)+(vlen))
+#endif
+#define XFS_ATTR_SF_ENTSIZE_MAX                        /* max space for name&value */ \
+       ((1 << (NBBY*(int)sizeof(__uint8_t))) - 1)
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ATTR_SF_ENTSIZE)
+int xfs_attr_sf_entsize(xfs_attr_sf_entry_t *sfep);
+#define XFS_ATTR_SF_ENTSIZE(sfep)      xfs_attr_sf_entsize(sfep)
+#else
+#define XFS_ATTR_SF_ENTSIZE(sfep)              /* space an entry uses */ \
+       ((int)sizeof(xfs_attr_sf_entry_t)-1 + (sfep)->namelen+(sfep)->valuelen)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ATTR_SF_NEXTENTRY)
+xfs_attr_sf_entry_t *xfs_attr_sf_nextentry(xfs_attr_sf_entry_t *sfep);
+#define XFS_ATTR_SF_NEXTENTRY(sfep)    xfs_attr_sf_nextentry(sfep)
+#else
+#define XFS_ATTR_SF_NEXTENTRY(sfep)            /* next entry in struct */ \
+       ((xfs_attr_sf_entry_t *) \
+               ((char *)(sfep) + XFS_ATTR_SF_ENTSIZE(sfep)))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ATTR_SF_TOTSIZE)
+int xfs_attr_sf_totsize(struct xfs_inode *dp);
+#define XFS_ATTR_SF_TOTSIZE(dp)                xfs_attr_sf_totsize(dp)
+#else
+#define XFS_ATTR_SF_TOTSIZE(dp)                        /* total space in use */ \
+       (INT_GET(((xfs_attr_shortform_t *)((dp)->i_afp->if_u1.if_data))->hdr.totsize, ARCH_CONVERT))
+#endif
+
+#ifdef XFS_ALL_TRACE
+#define        XFS_ATTR_TRACE
+#endif
+
+#if !defined(DEBUG)
+#undef XFS_ATTR_TRACE
+#endif
+
+/*
+ * Kernel tracing support for attribute lists
+ */
+struct xfs_attr_list_context;
+struct xfs_da_intnode;
+struct xfs_da_node_entry;
+struct xfs_attr_leafblock;
+
+#define        XFS_ATTR_TRACE_SIZE     4096    /* size of global trace buffer */     
+
+/*
+ * Trace record types.
+ */
+#define        XFS_ATTR_KTRACE_L_C     1       /* context */
+#define        XFS_ATTR_KTRACE_L_CN    2       /* context, node */
+#define        XFS_ATTR_KTRACE_L_CB    3       /* context, btree */
+#define        XFS_ATTR_KTRACE_L_CL    4       /* context, leaf */
+
+#if defined(XFS_ATTR_TRACE)
+
+void xfs_attr_trace_l_c(char *where, struct xfs_attr_list_context *context);
+void xfs_attr_trace_l_cn(char *where, struct xfs_attr_list_context *context,
+                             struct xfs_da_intnode *node);
+void xfs_attr_trace_l_cb(char *where, struct xfs_attr_list_context *context,
+                             struct xfs_da_node_entry *btree);
+void xfs_attr_trace_l_cl(char *where, struct xfs_attr_list_context *context,
+                             struct xfs_attr_leafblock *leaf);
+void xfs_attr_trace_enter(int type, char *where,
+                            __psunsigned_t a2, __psunsigned_t a3,
+                            __psunsigned_t a4, __psunsigned_t a5,
+                            __psunsigned_t a6, __psunsigned_t a7,
+                            __psunsigned_t a8, __psunsigned_t a9,
+                            __psunsigned_t a10, __psunsigned_t a11,
+                            __psunsigned_t a12, __psunsigned_t a13,
+                            __psunsigned_t a14, __psunsigned_t a15);
+#else
+#define        xfs_attr_trace_l_c(w,c)
+#define        xfs_attr_trace_l_cn(w,c,n)
+#define        xfs_attr_trace_l_cb(w,c,b)
+#define        xfs_attr_trace_l_cl(w,c,l)
+#endif /* XFS_ATTR_TRACE */
+
+#endif /* __XFS_ATTR_SF_H__ */
diff --git a/include/xfs_bit.h b/include/xfs_bit.h
new file mode 100644 (file)
index 0000000..80eccc5
--- /dev/null
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_BIT_H__
+#define        __XFS_BIT_H__
+
+/*
+ * XFS bit manipulation routines.
+ */
+
+/*
+ * masks with n high/low bits set, 32-bit values & 64-bit values
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_MASK32HI)
+__uint32_t xfs_mask32hi(int n);
+#define        XFS_MASK32HI(n)         xfs_mask32hi(n)
+#else
+#define        XFS_MASK32HI(n)         ((__uint32_t)-1 << (32 - (n)))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_MASK64HI)
+__uint64_t xfs_mask64hi(int n);
+#define        XFS_MASK64HI(n)         xfs_mask64hi(n)
+#else
+#define        XFS_MASK64HI(n)         ((__uint64_t)-1 << (64 - (n)))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_MASK32LO)
+__uint32_t xfs_mask32lo(int n);
+#define        XFS_MASK32LO(n)         xfs_mask32lo(n)
+#else
+#define        XFS_MASK32LO(n)         (((__uint32_t)1 << (n)) - 1)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_MASK64LO)
+__uint64_t xfs_mask64lo(int n);
+#define        XFS_MASK64LO(n)         xfs_mask64lo(n)
+#else
+#define        XFS_MASK64LO(n)         (((__uint64_t)1 << (n)) - 1)
+#endif
+
+/*
+ * Index of low bit number in byte, -1 for none set, 0..7 otherwise.
+ */
+extern const char xfs_lowbit[256];
+
+/*
+ * Index of high bit number in byte, -1 for none set, 0..7 otherwise.
+ */
+extern const char xfs_highbit[256];
+
+/*
+ * Count of bits set in byte, 0..8.
+ */
+extern const char xfs_countbit[256];
+
+/*
+ * xfs_lowbit32: get low bit set out of 32-bit argument, -1 if none set.
+ */
+extern int xfs_lowbit32(__uint32_t v);
+
+/*
+ * xfs_highbit32: get high bit set out of 32-bit argument, -1 if none set.
+ */
+extern int xfs_highbit32(__uint32_t v);
+
+/*
+ * xfs_lowbit64: get low bit set out of 64-bit argument, -1 if none set.
+ */
+extern int xfs_lowbit64(__uint64_t v);
+
+/*
+ * xfs_highbit64: get high bit set out of 64-bit argument, -1 if none set.
+ */
+extern int xfs_highbit64(__uint64_t);
+
+#endif /* __XFS_BIT_H__ */
diff --git a/include/xfs_bmap.h b/include/xfs_bmap.h
new file mode 100644 (file)
index 0000000..fa1a9e4
--- /dev/null
@@ -0,0 +1,397 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_BMAP_H__
+#define        __XFS_BMAP_H__
+
+struct getbmap;
+struct xfs_bmbt_irec;
+struct xfs_inode;
+struct xfs_mount;
+struct xfs_trans;
+
+/*
+ * List of extents to be free "later".
+ * The list is kept sorted on xbf_startblock.
+ */
+typedef struct xfs_bmap_free_item
+{
+       xfs_fsblock_t           xbfi_startblock;/* starting fs block number */
+       xfs_extlen_t            xbfi_blockcount;/* number of blocks in extent */
+       struct xfs_bmap_free_item *xbfi_next;   /* link to next entry */
+} xfs_bmap_free_item_t;
+
+/*
+ * Header for free extent list.
+ */
+typedef        struct xfs_bmap_free
+{
+       xfs_bmap_free_item_t    *xbf_first;     /* list of to-be-free extents */
+       int                     xbf_count;      /* count of items on list */
+       int                     xbf_low;        /* kludge: alloc in low mode */
+} xfs_bmap_free_t;
+
+#define        XFS_BMAP_MAX_NMAP       4
+
+/*
+ * Flags for xfs_bmapi
+ */
+#define        XFS_BMAPI_WRITE         0x001   /* write operation: allocate space */
+#define XFS_BMAPI_DELAY                0x002   /* delayed write operation */
+#define XFS_BMAPI_ENTIRE       0x004   /* return entire extent, not trimmed */
+#define XFS_BMAPI_METADATA     0x008   /* mapping metadata not user data */
+#define XFS_BMAPI_EXACT                0x010   /* allocate only to spec'd bounds */
+#define XFS_BMAPI_ATTRFORK     0x020   /* use attribute fork not data */
+#define XFS_BMAPI_ASYNC                0x040   /* bunmapi xactions can be async */
+#define XFS_BMAPI_RSVBLOCKS    0x080   /* OK to alloc. reserved data blocks */
+#define        XFS_BMAPI_PREALLOC      0x100   /* preallocation op: unwritten space */
+#define        XFS_BMAPI_IGSTATE       0x200   /* Ignore state - */
+                                       /* combine contig. space */
+#define        XFS_BMAPI_CONTIG        0x400   /* must allocate only one extent */
+#define XFS_BMAPI_DIRECT_IO    0x800   /* Flag from cxfs client, not used
+                                        * by xfs directly. Indicates alloc
+                                        * request is for direct I/O not
+                                        * extent conversion by server */
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAPI_AFLAG)
+int xfs_bmapi_aflag(int w);
+#define        XFS_BMAPI_AFLAG(w)      xfs_bmapi_aflag(w)
+#else
+#define        XFS_BMAPI_AFLAG(w)      ((w) == XFS_ATTR_FORK ? XFS_BMAPI_ATTRFORK : 0)
+#endif
+
+/*
+ * Special values for xfs_bmbt_irec_t br_startblock field.
+ */
+#define        DELAYSTARTBLOCK         ((xfs_fsblock_t)-1LL)
+#define        HOLESTARTBLOCK          ((xfs_fsblock_t)-2LL)
+
+/*
+ * Trace operations for bmap extent tracing
+ */
+#define        XFS_BMAP_KTRACE_DELETE  1
+#define        XFS_BMAP_KTRACE_INSERT  2
+#define        XFS_BMAP_KTRACE_PRE_UP  3
+#define        XFS_BMAP_KTRACE_POST_UP 4
+
+#define        XFS_BMAP_TRACE_SIZE     4096    /* size of global trace buffer */
+#define        XFS_BMAP_KTRACE_SIZE    32      /* size of per-inode trace buffer */
+
+#if defined(XFS_ALL_TRACE)
+#define        XFS_BMAP_TRACE
+#endif
+
+#if !defined(DEBUG)
+#undef XFS_BMAP_TRACE
+#endif
+
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_INIT)
+void xfs_bmap_init(xfs_bmap_free_t *flp, xfs_fsblock_t *fbp);
+#define        XFS_BMAP_INIT(flp,fbp)  xfs_bmap_init(flp,fbp)
+#else
+#define        XFS_BMAP_INIT(flp,fbp)  \
+       ((flp)->xbf_first = NULL, (flp)->xbf_count = 0, \
+        (flp)->xbf_low = 0, *(fbp) = NULLFSBLOCK)
+#endif
+
+/*
+ * Argument structure for xfs_bmap_alloc.
+ */
+typedef struct xfs_bmalloca {
+       xfs_fsblock_t           firstblock; /* i/o first block allocated */
+       xfs_fsblock_t           rval;   /* starting block of new extent */
+       xfs_fileoff_t           off;    /* offset in file filling in */
+       struct xfs_trans        *tp;    /* transaction pointer */
+       struct xfs_inode        *ip;    /* incore inode pointer */
+       struct xfs_bmbt_irec    *prevp; /* extent before the new one */
+       struct xfs_bmbt_irec    *gotp;  /* extent after, or delayed */
+       xfs_extlen_t            alen;   /* i/o length asked/allocated */
+       xfs_extlen_t            total;  /* total blocks needed for xaction */
+       xfs_extlen_t            minlen; /* mininum allocation size (blocks) */
+       xfs_extlen_t            minleft; /* amount must be left after alloc */
+       int                     eof;    /* set if allocating past last extent */
+       int                     wasdel; /* replacing a delayed allocation */
+       int                     userdata;/* set if is user data */
+       int                     low;    /* low on space, using seq'l ags */
+       int                     aeof;   /* allocated space at eof */
+} xfs_bmalloca_t;
+
+#ifdef __KERNEL__
+/*
+ * Convert inode from non-attributed to attributed.
+ * Must not be in a transaction, ip must not be locked.
+ */
+int                                    /* error code */
+xfs_bmap_add_attrfork(
+       struct xfs_inode        *ip,    /* incore inode pointer */
+       int                                     rsvd);  /* flag for reserved block allocation */
+
+/*
+ * Add the extent to the list of extents to be free at transaction end.
+ * The list is maintained sorted (by block number).
+ */
+void
+xfs_bmap_add_free(
+       xfs_fsblock_t           bno,            /* fs block number of extent */
+       xfs_filblks_t           len,            /* length of extent */
+       xfs_bmap_free_t         *flist,         /* list of extents */
+       struct xfs_mount        *mp);           /* mount point structure */
+
+/*
+ * Routine to clean up the free list data structure when
+ * an error occurs during a transaction.
+ */
+void
+xfs_bmap_cancel(
+       xfs_bmap_free_t         *flist);        /* free list to clean up */
+
+/*
+ * Routine to check if a specified inode is swap capable.
+ */
+int
+xfs_bmap_check_swappable(
+       struct xfs_inode        *ip);           /* incore inode */
+
+/* 
+ * Compute and fill in the value of the maximum depth of a bmap btree
+ * in this filesystem.  Done once, during mount.
+ */
+void
+xfs_bmap_compute_maxlevels(
+       struct xfs_mount        *mp,    /* file system mount structure */
+       int                     whichfork);     /* data or attr fork */
+
+/*
+ * Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi 
+ * caller.  Frees all the extents that need freeing, which must be done
+ * last due to locking considerations.
+ *
+ * Return 1 if the given transaction was committed and a new one allocated,
+ * and 0 otherwise.
+ */
+int                                            /* error */
+xfs_bmap_finish(
+       struct xfs_trans        **tp,           /* transaction pointer addr */
+       xfs_bmap_free_t         *flist,         /* i/o: list extents to free */
+       xfs_fsblock_t           firstblock,     /* controlled a.g. for allocs */
+       int                     *committed);    /* xact committed or not */
+
+/*
+ * Returns the file-relative block number of the first unused block in the file.
+ * This is the lowest-address hole if the file has holes, else the first block
+ * past the end of file.
+ */
+int                                            /* error */
+xfs_bmap_first_unused(
+       struct xfs_trans        *tp,            /* transaction pointer */
+       struct xfs_inode        *ip,            /* incore inode */
+       xfs_extlen_t            len,            /* size of hole to find */
+       xfs_fileoff_t           *unused,        /* unused block num */
+       int                     whichfork);     /* data or attr fork */
+
+/*
+ * Returns the file-relative block number of the last block + 1 before
+ * last_block (input value) in the file.
+ * This is not based on i_size, it is based on the extent list.
+ * Returns 0 for local files, as they do not have an extent list.
+ */
+int                                            /* error */
+xfs_bmap_last_before(
+       struct xfs_trans        *tp,            /* transaction pointer */
+       struct xfs_inode        *ip,            /* incore inode */
+       xfs_fileoff_t           *last_block,    /* last block */
+       int                     whichfork);     /* data or attr fork */
+
+/*
+ * Returns the file-relative block number of the first block past eof in
+ * the file.  This is not based on i_size, it is based on the extent list.
+ * Returns 0 for local files, as they do not have an extent list.
+ */
+int                                            /* error */
+xfs_bmap_last_offset(
+       struct xfs_trans        *tp,            /* transaction pointer */
+       struct xfs_inode        *ip,            /* incore inode */
+       xfs_fileoff_t           *unused,        /* last block num */
+       int                     whichfork);     /* data or attr fork */
+
+/*
+ * Returns whether the selected fork of the inode has exactly one
+ * block or not.  For the data fork we check this matches di_size,
+ * implying the file's range is 0..bsize-1.
+ */
+int
+xfs_bmap_one_block(
+       struct xfs_inode        *ip,            /* incore inode */
+       int                     whichfork);     /* data or attr fork */
+
+/*
+ * Read in the extents to iu_extents.
+ * All inode fields are set up by caller, we just traverse the btree
+ * and copy the records in.
+ */
+int                                            /* error */
+xfs_bmap_read_extents(
+       struct xfs_trans        *tp,            /* transaction pointer */
+       struct xfs_inode        *ip,            /* incore inode */
+       int                     whichfork);     /* data or attr fork */
+
+#if defined(XFS_BMAP_TRACE)
+/*
+ * Add bmap trace insert entries for all the contents of the extent list.
+ */
+void
+xfs_bmap_trace_exlist(
+       char                    *fname,         /* function name */
+       struct xfs_inode        *ip,            /* incore inode pointer */
+       xfs_extnum_t            cnt,            /* count of entries in list */
+       int                     whichfork);     /* data or attr fork */
+#else
+#define        xfs_bmap_trace_exlist(f,ip,c,w)
+#endif
+
+/*
+ * Map file blocks to filesystem blocks.
+ * File range is given by the bno/len pair.
+ * Adds blocks to file if a write ("flags & XFS_BMAPI_WRITE" set)
+ * into a hole or past eof.
+ * Only allocates blocks from a single allocation group,
+ * to avoid locking problems.
+ * The returned value in "firstblock" from the first call in a transaction
+ * must be remembered and presented to subsequent calls in "firstblock".
+ * An upper bound for the number of blocks to be allocated is supplied to
+ * the first call in "total"; if no allocation group has that many free
+ * blocks then the call will fail (return NULLFSBLOCK in "firstblock"). 
+ */
+int                                            /* error */
+xfs_bmapi(
+       struct xfs_trans        *tp,            /* transaction pointer */
+       struct xfs_inode        *ip,            /* incore inode */
+       xfs_fileoff_t           bno,            /* starting file offs. mapped */
+       xfs_filblks_t           len,            /* length to map in file */
+       int                     flags,          /* XFS_BMAPI_... */
+       xfs_fsblock_t           *firstblock,    /* first allocated block
+                                                  controls a.g. for allocs */
+       xfs_extlen_t            total,          /* total blocks needed */
+       struct xfs_bmbt_irec    *mval,          /* output: map values */
+       int                     *nmap,          /* i/o: mval size/count */
+       xfs_bmap_free_t         *flist);        /* i/o: list extents to free */
+
+/*
+ * Map file blocks to filesystem blocks, simple version.
+ * One block only, read-only.
+ * For flags, only the XFS_BMAPI_ATTRFORK flag is examined.
+ * For the other flag values, the effect is as if XFS_BMAPI_METADATA
+ * was set and all the others were clear.
+ */
+int                                            /* error */
+xfs_bmapi_single(
+       struct xfs_trans        *tp,            /* transaction pointer */
+       struct xfs_inode        *ip,            /* incore inode */
+       int                     whichfork,      /* data or attr fork */
+       xfs_fsblock_t           *fsb,           /* output: mapped block */
+       xfs_fileoff_t           bno);           /* starting file offs. mapped */
+
+/*
+ * Unmap (remove) blocks from a file.
+ * If nexts is nonzero then the number of extents to remove is limited to
+ * that value.  If not all extents in the block range can be removed then
+ * *done is set.
+ */
+int                                            /* error */
+xfs_bunmapi(
+       struct xfs_trans        *tp,            /* transaction pointer */
+       struct xfs_inode        *ip,            /* incore inode */
+       xfs_fileoff_t           bno,            /* starting offset to unmap */
+       xfs_filblks_t           len,            /* length to unmap in file */
+       int                     flags,          /* XFS_BMAPI_... */
+       xfs_extnum_t            nexts,          /* number of extents max */
+       xfs_fsblock_t           *firstblock,    /* first allocated block
+                                                  controls a.g. for allocs */
+       xfs_bmap_free_t         *flist,         /* i/o: list extents to free */
+       int                     *done);         /* set if not done yet */
+
+/*
+ * Fcntl interface to xfs_bmapi.
+ */
+int                                            /* error code */
+xfs_getbmap(
+       bhv_desc_t              *bdp,           /* XFS behavior descriptor*/
+       struct getbmap          *bmv,           /* user bmap structure */
+       void                    *ap,            /* pointer to user's array */
+       int                     iflags);        /* interface flags */
+
+/*
+ * Check the last inode extent to determine whether this allocation will result
+ * in blocks being allocated at the end of the file. When we allocate new data
+ * blocks at the end of the file which do not start at the previous data block,
+ * we will try to align the new blocks at stripe unit boundaries.
+ */
+int
+xfs_bmap_isaeof(
+        struct xfs_inode       *ip,
+        xfs_fileoff_t          off,
+        int                    whichfork,
+        int                    *aeof);
+
+/*
+ * Check if the endoff is outside the last extent. If so the caller will grow 
+ * the allocation to a stripe unit boundary
+ */
+int
+xfs_bmap_eof(
+        struct xfs_inode        *ip,
+        xfs_fileoff_t           endoff,
+        int                     whichfork,
+        int                     *eof);
+
+/*
+ * Count fsblocks of the given fork.
+ */
+int
+xfs_bmap_count_blocks(
+       xfs_trans_t             *tp,
+       xfs_inode_t             *ip,
+       int                     whichfork,
+       int                     *count);
+
+/*
+ * Check an extent list, which has just been read, for
+ * any bit in the extent flag field.
+ */
+int
+xfs_check_nostate_extents(
+       xfs_bmbt_rec_t          *ep,
+       xfs_extnum_t            num);
+
+#endif /* __KERNEL__ */
+
+#endif /* __XFS_BMAP_H__ */
diff --git a/include/xfs_bmap_btree.h b/include/xfs_bmap_btree.h
new file mode 100644 (file)
index 0000000..af8ac67
--- /dev/null
@@ -0,0 +1,661 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_BMAP_BTREE_H__
+#define        __XFS_BMAP_BTREE_H__
+
+#define        XFS_BMAP_MAGIC  0x424d4150      /* 'BMAP' */
+
+struct xfs_btree_cur;
+struct xfs_btree_lblock;
+struct xfs_mount;
+struct xfs_inode;
+
+/*
+ * Bmap root header, on-disk form only.
+ */
+typedef struct xfs_bmdr_block
+{
+       __uint16_t      bb_level;       /* 0 is a leaf */
+       __uint16_t      bb_numrecs;     /* current # of data records */
+} xfs_bmdr_block_t;
+
+/*
+ * Bmap btree record and extent descriptor.
+ * For 32-bit kernels,
+ *  l0:31 is an extent flag (value 1 indicates non-normal).
+ *  l0:0-30 and l1:9-31 are startoff.
+ *  l1:0-8, l2:0-31, and l3:21-31 are startblock.
+ *  l3:0-20 are blockcount.
+ * For 64-bit kernels,
+ *  l0:63 is an extent flag (value 1 indicates non-normal).
+ *  l0:9-62 are startoff.
+ *  l0:0-8 and l1:21-63 are startblock.
+ *  l1:0-20 are blockcount.
+ */
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+
+#define        BMBT_TOTAL_BITLEN       128     /* 128 bits, 16 bytes */
+#define        BMBT_EXNTFLAG_BITOFF    0
+#define        BMBT_EXNTFLAG_BITLEN    1
+#define        BMBT_STARTOFF_BITOFF    (BMBT_EXNTFLAG_BITOFF + BMBT_EXNTFLAG_BITLEN)
+#define        BMBT_STARTOFF_BITLEN    54
+#define        BMBT_STARTBLOCK_BITOFF  (BMBT_STARTOFF_BITOFF + BMBT_STARTOFF_BITLEN)
+#define        BMBT_STARTBLOCK_BITLEN  52
+#define        BMBT_BLOCKCOUNT_BITOFF  \
+       (BMBT_STARTBLOCK_BITOFF + BMBT_STARTBLOCK_BITLEN)
+#define        BMBT_BLOCKCOUNT_BITLEN  (BMBT_TOTAL_BITLEN - BMBT_BLOCKCOUNT_BITOFF)
+
+#else
+
+#define        BMBT_TOTAL_BITLEN       128     /* 128 bits, 16 bytes */
+#define        BMBT_EXNTFLAG_BITOFF    63
+#define        BMBT_EXNTFLAG_BITLEN    1
+#define        BMBT_STARTOFF_BITOFF    (BMBT_EXNTFLAG_BITOFF - BMBT_STARTOFF_BITLEN)
+#define        BMBT_STARTOFF_BITLEN    54
+#define        BMBT_STARTBLOCK_BITOFF  85 /* 128 - 43 (other 9 is in first word) */
+#define        BMBT_STARTBLOCK_BITLEN  52
+#define        BMBT_BLOCKCOUNT_BITOFF  64 /* Start of second 64 bit container */
+#define        BMBT_BLOCKCOUNT_BITLEN  21
+
+#endif
+
+
+#define        BMBT_USE_64     1
+
+typedef struct xfs_bmbt_rec_32
+{
+       __uint32_t              l0, l1, l2, l3;
+} xfs_bmbt_rec_32_t;
+typedef struct xfs_bmbt_rec_64
+{
+       __uint64_t              l0, l1;
+} xfs_bmbt_rec_64_t;
+
+#if BMBT_USE_64
+typedef        __uint64_t      xfs_bmbt_rec_base_t;    /* use this for casts */
+typedef xfs_bmbt_rec_64_t xfs_bmbt_rec_t, xfs_bmdr_rec_t;
+#else  /* !BMBT_USE_64 */
+typedef        __uint32_t      xfs_bmbt_rec_base_t;    /* use this for casts */
+typedef xfs_bmbt_rec_32_t xfs_bmbt_rec_t, xfs_bmdr_rec_t;
+#endif /* BMBT_USE_64 */
+
+/*
+ * Values and macros for delayed-allocation startblock fields.
+ */
+#define        STARTBLOCKVALBITS       17
+#define        STARTBLOCKMASKBITS      (15 + XFS_BIG_FILESYSTEMS * 20)
+#define        DSTARTBLOCKMASKBITS     (15 + 20)
+#define        STARTBLOCKMASK          \
+       (((((xfs_fsblock_t)1) << STARTBLOCKMASKBITS) - 1) << STARTBLOCKVALBITS)
+#define        DSTARTBLOCKMASK         \
+       (((((xfs_dfsbno_t)1) << DSTARTBLOCKMASKBITS) - 1) << STARTBLOCKVALBITS)
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_ISNULLSTARTBLOCK)
+int isnullstartblock(xfs_fsblock_t x);
+#define        ISNULLSTARTBLOCK(x)     isnullstartblock(x)
+#else
+#define        ISNULLSTARTBLOCK(x)     (((x) & STARTBLOCKMASK) == STARTBLOCKMASK)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_ISNULLDSTARTBLOCK)
+int isnulldstartblock(xfs_dfsbno_t x);
+#define        ISNULLDSTARTBLOCK(x)    isnulldstartblock(x)
+#else
+#define        ISNULLDSTARTBLOCK(x)    (((x) & DSTARTBLOCKMASK) == DSTARTBLOCKMASK)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_NULLSTARTBLOCK)
+xfs_fsblock_t nullstartblock(int k);
+#define        NULLSTARTBLOCK(k)       nullstartblock(k)
+#else
+#define        NULLSTARTBLOCK(k)       \
+       ((ASSERT(k < (1 << STARTBLOCKVALBITS))), (STARTBLOCKMASK | (k)))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_STARTBLOCKVAL)
+xfs_filblks_t startblockval(xfs_fsblock_t x);
+#define        STARTBLOCKVAL(x)        startblockval(x)
+#else
+#define        STARTBLOCKVAL(x)        ((xfs_filblks_t)((x) & ~STARTBLOCKMASK))
+#endif
+
+/*
+ * Possible extent formats.
+ */
+typedef        enum {
+       XFS_EXTFMT_NOSTATE = 0,
+       XFS_EXTFMT_HASSTATE
+} xfs_exntfmt_t;
+
+/*
+ * Possible extent states.
+ */
+typedef        enum {
+       XFS_EXT_NORM, XFS_EXT_UNWRITTEN,
+       XFS_EXT_DMAPI_OFFLINE
+} xfs_exntst_t;
+
+/*
+ * Extent state and extent format macros.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_EXTFMT_INODE )
+xfs_exntfmt_t xfs_extfmt_inode(struct xfs_inode *ip);
+#define        XFS_EXTFMT_INODE(x)     xfs_extfmt_inode(x)
+#else
+#define        XFS_EXTFMT_INODE(x) \
+  (XFS_SB_VERSION_HASEXTFLGBIT(&((x)->i_mount->m_sb)) ? \
+       XFS_EXTFMT_HASSTATE : XFS_EXTFMT_NOSTATE)
+#endif
+#define        ISUNWRITTEN(x)          ((x) == XFS_EXT_UNWRITTEN)
+
+/*
+ * Incore version of above.
+ */
+typedef struct xfs_bmbt_irec
+{
+       xfs_fileoff_t   br_startoff;    /* starting file offset */
+       xfs_fsblock_t   br_startblock;  /* starting block number */
+       xfs_filblks_t   br_blockcount;  /* number of blocks */
+       xfs_exntst_t    br_state;       /* extent state */
+} xfs_bmbt_irec_t;
+
+/*
+ * Key structure for non-leaf levels of the tree.
+ */
+typedef struct xfs_bmbt_key
+{
+       xfs_dfiloff_t   br_startoff;    /* starting file offset */
+} xfs_bmbt_key_t, xfs_bmdr_key_t;
+
+typedef xfs_dfsbno_t xfs_bmbt_ptr_t, xfs_bmdr_ptr_t;   /* btree pointer type */
+                                       /* btree block header type */
+typedef        struct xfs_btree_lblock xfs_bmbt_block_t;
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BUF_TO_BMBT_BLOCK)
+xfs_bmbt_block_t *xfs_buf_to_bmbt_block(struct xfs_buf *bp);
+#define        XFS_BUF_TO_BMBT_BLOCK(bp)               xfs_buf_to_bmbt_block(bp)
+#else
+#define        XFS_BUF_TO_BMBT_BLOCK(bp) ((xfs_bmbt_block_t *)(XFS_BUF_PTR(bp)))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_RBLOCK_DSIZE)
+int xfs_bmap_rblock_dsize(int lev, struct xfs_btree_cur *cur);
+#define        XFS_BMAP_RBLOCK_DSIZE(lev,cur)          xfs_bmap_rblock_dsize(lev,cur)
+#else
+#define        XFS_BMAP_RBLOCK_DSIZE(lev,cur) ((cur)->bc_private.b.forksize)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_RBLOCK_ISIZE)
+int xfs_bmap_rblock_isize(int lev, struct xfs_btree_cur *cur);
+#define        XFS_BMAP_RBLOCK_ISIZE(lev,cur)          xfs_bmap_rblock_isize(lev,cur)
+#else
+#define        XFS_BMAP_RBLOCK_ISIZE(lev,cur) \
+       ((int)XFS_IFORK_PTR((cur)->bc_private.b.ip, \
+                           (cur)->bc_private.b.whichfork)->if_broot_bytes)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_IBLOCK_SIZE)
+int xfs_bmap_iblock_size(int lev, struct xfs_btree_cur *cur);
+#define        XFS_BMAP_IBLOCK_SIZE(lev,cur)           xfs_bmap_iblock_size(lev,cur)
+#else
+#define        XFS_BMAP_IBLOCK_SIZE(lev,cur) (1 << (cur)->bc_blocklog)
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_BLOCK_DSIZE)
+int xfs_bmap_block_dsize(int lev, struct xfs_btree_cur *cur);
+#define        XFS_BMAP_BLOCK_DSIZE(lev,cur)           xfs_bmap_block_dsize(lev,cur)
+#else
+#define        XFS_BMAP_BLOCK_DSIZE(lev,cur) \
+       ((lev) == (cur)->bc_nlevels - 1 ? \
+               XFS_BMAP_RBLOCK_DSIZE(lev,cur) : \
+               XFS_BMAP_IBLOCK_SIZE(lev,cur))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_BLOCK_ISIZE)
+int xfs_bmap_block_isize(int lev, struct xfs_btree_cur *cur);
+#define        XFS_BMAP_BLOCK_ISIZE(lev,cur)           xfs_bmap_block_isize(lev,cur)
+#else
+#define        XFS_BMAP_BLOCK_ISIZE(lev,cur) \
+       ((lev) == (cur)->bc_nlevels - 1 ? \
+               XFS_BMAP_RBLOCK_ISIZE(lev,cur) : \
+               XFS_BMAP_IBLOCK_SIZE(lev,cur))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_BLOCK_DMAXRECS)
+int xfs_bmap_block_dmaxrecs(int lev, struct xfs_btree_cur *cur);
+#define        XFS_BMAP_BLOCK_DMAXRECS(lev,cur)        xfs_bmap_block_dmaxrecs(lev,cur)
+#else
+#define        XFS_BMAP_BLOCK_DMAXRECS(lev,cur) \
+       ((lev) == (cur)->bc_nlevels - 1 ? \
+               XFS_BTREE_BLOCK_MAXRECS(XFS_BMAP_RBLOCK_DSIZE(lev,cur), \
+                       xfs_bmdr, (lev) == 0) : \
+               ((cur)->bc_mp->m_bmap_dmxr[(lev) != 0]))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_BLOCK_IMAXRECS)
+int xfs_bmap_block_imaxrecs(int lev, struct xfs_btree_cur *cur);
+#define        XFS_BMAP_BLOCK_IMAXRECS(lev,cur)        xfs_bmap_block_imaxrecs(lev,cur)
+#else
+#define        XFS_BMAP_BLOCK_IMAXRECS(lev,cur) \
+       ((lev) == (cur)->bc_nlevels - 1 ? \
+               XFS_BTREE_BLOCK_MAXRECS(XFS_BMAP_RBLOCK_ISIZE(lev,cur), \
+                       xfs_bmbt, (lev) == 0) : \
+               ((cur)->bc_mp->m_bmap_dmxr[(lev) != 0]))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_BLOCK_DMINRECS)
+int xfs_bmap_block_dminrecs(int lev, struct xfs_btree_cur *cur);
+#define        XFS_BMAP_BLOCK_DMINRECS(lev,cur)        xfs_bmap_block_dminrecs(lev,cur)
+#else
+#define        XFS_BMAP_BLOCK_DMINRECS(lev,cur) \
+       ((lev) == (cur)->bc_nlevels - 1 ? \
+               XFS_BTREE_BLOCK_MINRECS(XFS_BMAP_RBLOCK_DSIZE(lev,cur), \
+                       xfs_bmdr, (lev) == 0) : \
+               ((cur)->bc_mp->m_bmap_dmnr[(lev) != 0]))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_BLOCK_IMINRECS)
+int xfs_bmap_block_iminrecs(int lev, struct xfs_btree_cur *cur);
+#define        XFS_BMAP_BLOCK_IMINRECS(lev,cur)        xfs_bmap_block_iminrecs(lev,cur)
+#else
+#define        XFS_BMAP_BLOCK_IMINRECS(lev,cur) \
+       ((lev) == (cur)->bc_nlevels - 1 ? \
+               XFS_BTREE_BLOCK_MINRECS(XFS_BMAP_RBLOCK_ISIZE(lev,cur), \
+                       xfs_bmbt, (lev) == 0) : \
+               ((cur)->bc_mp->m_bmap_dmnr[(lev) != 0]))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_REC_DADDR)
+xfs_bmbt_rec_t *
+xfs_bmap_rec_daddr(xfs_bmbt_block_t *bb, int i, struct xfs_btree_cur *cur);
+#define        XFS_BMAP_REC_DADDR(bb,i,cur)            xfs_bmap_rec_daddr(bb,i,cur)
+#else
+#define        XFS_BMAP_REC_DADDR(bb,i,cur) \
+       XFS_BTREE_REC_ADDR(XFS_BMAP_BLOCK_DSIZE(                \
+               INT_GET((bb)->bb_level, ARCH_CONVERT), cur),    \
+               xfs_bmbt, bb, i, XFS_BMAP_BLOCK_DMAXRECS(       \
+                       INT_GET((bb)->bb_level, ARCH_CONVERT), cur))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_REC_IADDR)
+xfs_bmbt_rec_t *
+xfs_bmap_rec_iaddr(xfs_bmbt_block_t *bb, int i, struct xfs_btree_cur *cur);
+#define        XFS_BMAP_REC_IADDR(bb,i,cur)            xfs_bmap_rec_iaddr(bb,i,cur)
+#else
+#define        XFS_BMAP_REC_IADDR(bb,i,cur) \
+       XFS_BTREE_REC_ADDR(XFS_BMAP_BLOCK_ISIZE(                \
+               INT_GET((bb)->bb_level, ARCH_CONVERT), cur),    \
+               xfs_bmbt, bb, i, XFS_BMAP_BLOCK_IMAXRECS(       \
+                       INT_GET((bb)->bb_level, ARCH_CONVERT), cur))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_KEY_DADDR)
+xfs_bmbt_key_t *
+xfs_bmap_key_daddr(xfs_bmbt_block_t *bb, int i, struct xfs_btree_cur *cur);
+#define        XFS_BMAP_KEY_DADDR(bb,i,cur)            xfs_bmap_key_daddr(bb,i,cur)
+#else
+#define        XFS_BMAP_KEY_DADDR(bb,i,cur) \
+       XFS_BTREE_KEY_ADDR(XFS_BMAP_BLOCK_DSIZE(                \
+               INT_GET((bb)->bb_level, ARCH_CONVERT), cur),    \
+               xfs_bmbt, bb, i, XFS_BMAP_BLOCK_DMAXRECS(       \
+                       INT_GET((bb)->bb_level, ARCH_CONVERT), cur))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_KEY_IADDR)
+xfs_bmbt_key_t *
+xfs_bmap_key_iaddr(xfs_bmbt_block_t *bb, int i, struct xfs_btree_cur *cur);
+#define        XFS_BMAP_KEY_IADDR(bb,i,cur)            xfs_bmap_key_iaddr(bb,i,cur)
+#else
+#define        XFS_BMAP_KEY_IADDR(bb,i,cur) \
+       XFS_BTREE_KEY_ADDR(XFS_BMAP_BLOCK_ISIZE(                \
+               INT_GET((bb)->bb_level, ARCH_CONVERT), cur),    \
+               xfs_bmbt, bb, i, XFS_BMAP_BLOCK_IMAXRECS(       \
+                       INT_GET((bb)->bb_level, ARCH_CONVERT), cur))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_PTR_DADDR)
+xfs_bmbt_ptr_t *
+xfs_bmap_ptr_daddr(xfs_bmbt_block_t *bb, int i, struct xfs_btree_cur *cur);
+#define        XFS_BMAP_PTR_DADDR(bb,i,cur)            xfs_bmap_ptr_daddr(bb,i,cur)
+#else
+#define        XFS_BMAP_PTR_DADDR(bb,i,cur) \
+       XFS_BTREE_PTR_ADDR(XFS_BMAP_BLOCK_DSIZE(                \
+               INT_GET((bb)->bb_level, ARCH_CONVERT), cur),    \
+               xfs_bmbt, bb, i, XFS_BMAP_BLOCK_DMAXRECS(       \
+                       INT_GET((bb)->bb_level, ARCH_CONVERT), cur))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_PTR_IADDR)
+xfs_bmbt_ptr_t *
+xfs_bmap_ptr_iaddr(xfs_bmbt_block_t *bb, int i, struct xfs_btree_cur *cur);
+#define        XFS_BMAP_PTR_IADDR(bb,i,cur)            xfs_bmap_ptr_iaddr(bb,i,cur)
+#else
+#define        XFS_BMAP_PTR_IADDR(bb,i,cur) \
+       XFS_BTREE_PTR_ADDR(XFS_BMAP_BLOCK_ISIZE(                \
+               INT_GET((bb)->bb_level, ARCH_CONVERT), cur),    \
+               xfs_bmbt, bb, i, XFS_BMAP_BLOCK_IMAXRECS(       \
+                       INT_GET((bb)->bb_level, ARCH_CONVERT), cur))
+#endif
+
+/*
+ * These are to be used when we know the size of the block and
+ * we don't have a cursor.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_BROOT_REC_ADDR)
+xfs_bmbt_rec_t *xfs_bmap_broot_rec_addr(xfs_bmbt_block_t *bb, int i, int sz);
+#define        XFS_BMAP_BROOT_REC_ADDR(bb,i,sz)        xfs_bmap_broot_rec_addr(bb,i,sz)
+#else
+#define        XFS_BMAP_BROOT_REC_ADDR(bb,i,sz) \
+       XFS_BTREE_REC_ADDR(sz,xfs_bmbt,bb,i,XFS_BMAP_BROOT_MAXRECS(sz))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_BROOT_KEY_ADDR)
+xfs_bmbt_key_t *xfs_bmap_broot_key_addr(xfs_bmbt_block_t *bb, int i, int sz);
+#define        XFS_BMAP_BROOT_KEY_ADDR(bb,i,sz)        xfs_bmap_broot_key_addr(bb,i,sz)
+#else
+#define        XFS_BMAP_BROOT_KEY_ADDR(bb,i,sz) \
+       XFS_BTREE_KEY_ADDR(sz,xfs_bmbt,bb,i,XFS_BMAP_BROOT_MAXRECS(sz))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_BROOT_PTR_ADDR)
+xfs_bmbt_ptr_t *xfs_bmap_broot_ptr_addr(xfs_bmbt_block_t *bb, int i, int sz);
+#define XFS_BMAP_BROOT_PTR_ADDR(bb,i,sz)       xfs_bmap_broot_ptr_addr(bb,i,sz)
+#else
+#define XFS_BMAP_BROOT_PTR_ADDR(bb,i,sz) \
+       XFS_BTREE_PTR_ADDR(sz,xfs_bmbt,bb,i,XFS_BMAP_BROOT_MAXRECS(sz))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_BROOT_NUMRECS)
+int xfs_bmap_broot_numrecs(xfs_bmdr_block_t *bb);
+#define        XFS_BMAP_BROOT_NUMRECS(bb)              xfs_bmap_broot_numrecs(bb)
+#else
+#define        XFS_BMAP_BROOT_NUMRECS(bb) (INT_GET((bb)->bb_numrecs, ARCH_CONVERT))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_BROOT_MAXRECS)
+int xfs_bmap_broot_maxrecs(int sz);
+#define        XFS_BMAP_BROOT_MAXRECS(sz)              xfs_bmap_broot_maxrecs(sz)
+#else
+#define        XFS_BMAP_BROOT_MAXRECS(sz) XFS_BTREE_BLOCK_MAXRECS(sz,xfs_bmbt,0)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_BROOT_SPACE_CALC)
+int xfs_bmap_broot_space_calc(int nrecs);
+#define        XFS_BMAP_BROOT_SPACE_CALC(nrecs)        xfs_bmap_broot_space_calc(nrecs)
+#else
+#define        XFS_BMAP_BROOT_SPACE_CALC(nrecs) \
+       ((int)(sizeof(xfs_bmbt_block_t) + \
+              ((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t)))))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_BROOT_SPACE)
+int xfs_bmap_broot_space(xfs_bmdr_block_t *bb);
+#define        XFS_BMAP_BROOT_SPACE(bb)                xfs_bmap_broot_space(bb)
+#else
+#define        XFS_BMAP_BROOT_SPACE(bb) \
+       XFS_BMAP_BROOT_SPACE_CALC(INT_GET((bb)->bb_numrecs, ARCH_CONVERT))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMDR_SPACE_CALC)
+int xfs_bmdr_space_calc(int nrecs);
+#define        XFS_BMDR_SPACE_CALC(nrecs)              xfs_bmdr_space_calc(nrecs)
+#else
+#define        XFS_BMDR_SPACE_CALC(nrecs)      \
+       ((int)(sizeof(xfs_bmdr_block_t) + \
+              ((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t)))))
+#endif
+
+/*
+ * Maximum number of bmap btree levels.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BM_MAXLEVELS)
+int xfs_bm_maxlevels(struct xfs_mount *mp, int w);
+#define        XFS_BM_MAXLEVELS(mp,w)                  xfs_bm_maxlevels(mp,w)
+#else
+#define        XFS_BM_MAXLEVELS(mp,w)          ((mp)->m_bm_maxlevels[w])
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_SANITY_CHECK)
+int xfs_bmap_sanity_check(struct xfs_mount *mp, xfs_bmbt_block_t *bb,
+       int level);
+#define        XFS_BMAP_SANITY_CHECK(mp,bb,level)      \
+       xfs_bmap_sanity_check(mp,bb,level)
+#else
+#define        XFS_BMAP_SANITY_CHECK(mp,bb,level)      \
+       (INT_GET((bb)->bb_magic, ARCH_CONVERT) == XFS_BMAP_MAGIC && \
+        INT_GET((bb)->bb_level, ARCH_CONVERT) == level && \
+        INT_GET((bb)->bb_numrecs, ARCH_CONVERT) > 0 && \
+        INT_GET((bb)->bb_numrecs, ARCH_CONVERT) <= (mp)->m_bmap_dmxr[(level) != 0])
+#endif
+
+/*
+ * Trace buffer entry types.
+ */
+#define        XFS_BMBT_KTRACE_ARGBI   1
+#define        XFS_BMBT_KTRACE_ARGBII  2
+#define        XFS_BMBT_KTRACE_ARGFFFI 3
+#define        XFS_BMBT_KTRACE_ARGI    4
+#define        XFS_BMBT_KTRACE_ARGIFK  5
+#define        XFS_BMBT_KTRACE_ARGIFR  6
+#define        XFS_BMBT_KTRACE_ARGIK   7
+#define        XFS_BMBT_KTRACE_CUR     8
+
+#define        XFS_BMBT_TRACE_SIZE     4096    /* size of global trace buffer */     
+#define        XFS_BMBT_KTRACE_SIZE    32      /* size of per-inode trace buffer */
+
+#if defined(XFS_ALL_TRACE)
+#define        XFS_BMBT_TRACE
+#endif
+
+#if !defined(DEBUG)
+#undef XFS_BMBT_TRACE
+#endif
+
+
+/*
+ * Prototypes for xfs_bmap.c to call.
+ */
+
+void
+xfs_bmdr_to_bmbt(
+       xfs_bmdr_block_t *,
+       int,
+       xfs_bmbt_block_t *,
+       int);
+
+int
+xfs_bmbt_decrement(
+       struct xfs_btree_cur *,
+       int,
+       int *);
+
+int
+xfs_bmbt_delete(
+       struct xfs_btree_cur *,
+       int,
+       int *);        
+
+void
+xfs_bmbt_get_all(
+       xfs_bmbt_rec_t  *r,
+       xfs_bmbt_irec_t *s);
+
+xfs_bmbt_block_t *
+xfs_bmbt_get_block(
+       struct xfs_btree_cur    *cur,
+       int                     level,
+       struct xfs_buf          **bpp);
+
+xfs_filblks_t
+xfs_bmbt_get_blockcount(
+       xfs_bmbt_rec_t  *r);
+
+xfs_fsblock_t
+xfs_bmbt_get_startblock(
+       xfs_bmbt_rec_t  *r);
+
+xfs_fileoff_t
+xfs_bmbt_get_startoff(
+       xfs_bmbt_rec_t  *r);
+
+xfs_exntst_t
+xfs_bmbt_get_state(
+       xfs_bmbt_rec_t  *r);
+
+int
+xfs_bmbt_increment(
+       struct xfs_btree_cur *,
+       int,
+       int *);
+
+int
+xfs_bmbt_insert(
+       struct xfs_btree_cur *,
+       int *);        
+
+int
+xfs_bmbt_insert_many(
+       struct xfs_btree_cur *,
+       int,
+       xfs_bmbt_rec_t *,
+       int *);        
+
+void
+xfs_bmbt_log_block(
+       struct xfs_btree_cur *,
+       struct xfs_buf *,
+       int);
+
+void
+xfs_bmbt_log_recs(
+       struct xfs_btree_cur *,
+       struct xfs_buf *,
+       int,
+       int);
+
+int
+xfs_bmbt_lookup_eq(
+       struct xfs_btree_cur *,
+       xfs_fileoff_t,
+       xfs_fsblock_t,
+       xfs_filblks_t,
+       int *);
+
+int
+xfs_bmbt_lookup_ge(
+       struct xfs_btree_cur *,
+       xfs_fileoff_t,
+       xfs_fsblock_t,
+       xfs_filblks_t,
+       int *);
+
+int
+xfs_bmbt_lookup_le(
+       struct xfs_btree_cur *,
+       xfs_fileoff_t,
+       xfs_fsblock_t,
+       xfs_filblks_t,
+       int *);
+
+/*
+ * Give the bmap btree a new root block.  Copy the old broot contents
+ * down into a real block and make the broot point to it.
+ */
+int                                            /* error */
+xfs_bmbt_newroot(
+       struct xfs_btree_cur    *cur,           /* btree cursor */
+       int                     *logflags,      /* logging flags for inode */
+       int                     *stat);         /* return status - 0 fail */
+
+void
+xfs_bmbt_set_all(
+       xfs_bmbt_rec_t  *r,
+       xfs_bmbt_irec_t *s);
+
+void
+xfs_bmbt_set_allf(
+       xfs_bmbt_rec_t  *r,
+       xfs_fileoff_t   o,
+       xfs_fsblock_t   b,
+       xfs_filblks_t   c,
+       xfs_exntst_t    v);
+
+void
+xfs_bmbt_set_blockcount(
+       xfs_bmbt_rec_t  *r,
+       xfs_filblks_t   v);
+
+void
+xfs_bmbt_set_startblock(
+       xfs_bmbt_rec_t  *r,
+       xfs_fsblock_t   v);
+
+void
+xfs_bmbt_set_startoff(
+       xfs_bmbt_rec_t  *r,
+       xfs_fileoff_t   v);
+
+void
+xfs_bmbt_set_state(
+       xfs_bmbt_rec_t  *r,
+       xfs_exntst_t    v);
+
+void
+xfs_bmbt_to_bmdr(
+       xfs_bmbt_block_t *,
+       int,
+       xfs_bmdr_block_t *,
+       int);
+
+int
+xfs_bmbt_update(
+       struct xfs_btree_cur *,
+       xfs_fileoff_t,
+       xfs_fsblock_t,
+       xfs_filblks_t,
+       xfs_exntst_t);
+
+#ifdef XFSDEBUG
+/* 
+ * Get the data from the pointed-to record.
+ */
+int
+xfs_bmbt_get_rec(
+       struct xfs_btree_cur *,
+       xfs_fileoff_t *,
+       xfs_fsblock_t *,
+       xfs_filblks_t *,
+       xfs_exntst_t *,
+       int *);
+#endif
+
+
+/*
+ * Search an extent list for the extent which includes block
+ * bno.
+ */
+xfs_bmbt_rec_t *
+xfs_bmap_do_search_extents(
+        xfs_bmbt_rec_t *,
+        xfs_extnum_t,
+        xfs_extnum_t,
+        xfs_fileoff_t,
+        int *,
+        xfs_extnum_t *,
+        xfs_bmbt_irec_t        *,
+        xfs_bmbt_irec_t        *);
+
+
+#endif /* __XFS_BMAP_BTREE_H__ */
diff --git a/include/xfs_btree.h b/include/xfs_btree.h
new file mode 100644 (file)
index 0000000..6f00a8c
--- /dev/null
@@ -0,0 +1,573 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_BTREE_H__
+#define        __XFS_BTREE_H__
+
+struct xfs_buf;
+struct xfs_bmap_free;
+struct xfs_inode;
+struct xfs_mount;
+struct xfs_trans;
+
+/*
+ * This nonsense is to make -wlint happy.
+ */
+#define        XFS_LOOKUP_EQ   ((xfs_lookup_t)XFS_LOOKUP_EQi)
+#define        XFS_LOOKUP_LE   ((xfs_lookup_t)XFS_LOOKUP_LEi)
+#define        XFS_LOOKUP_GE   ((xfs_lookup_t)XFS_LOOKUP_GEi)
+
+#define        XFS_BTNUM_BNO   ((xfs_btnum_t)XFS_BTNUM_BNOi)
+#define        XFS_BTNUM_CNT   ((xfs_btnum_t)XFS_BTNUM_CNTi)
+#define        XFS_BTNUM_BMAP  ((xfs_btnum_t)XFS_BTNUM_BMAPi)
+#define        XFS_BTNUM_INO   ((xfs_btnum_t)XFS_BTNUM_INOi)
+
+/*
+ * Short form header: space allocation btrees.
+ */
+typedef struct xfs_btree_sblock
+{
+       __uint32_t      bb_magic;       /* magic number for block type */
+       __uint16_t      bb_level;       /* 0 is a leaf */
+       __uint16_t      bb_numrecs;     /* current # of data records */
+       xfs_agblock_t   bb_leftsib;     /* left sibling block or NULLAGBLOCK */
+       xfs_agblock_t   bb_rightsib;    /* right sibling block or NULLAGBLOCK */
+} xfs_btree_sblock_t;
+
+/*
+ * Long form header: bmap btrees.
+ */
+typedef struct xfs_btree_lblock
+{
+       __uint32_t      bb_magic;       /* magic number for block type */
+       __uint16_t      bb_level;       /* 0 is a leaf */
+       __uint16_t      bb_numrecs;     /* current # of data records */
+       xfs_dfsbno_t    bb_leftsib;     /* left sibling block or NULLDFSBNO */
+       xfs_dfsbno_t    bb_rightsib;    /* right sibling block or NULLDFSBNO */
+} xfs_btree_lblock_t;
+
+/*
+ * Combined header and structure, used by common code.
+ */
+typedef struct xfs_btree_hdr
+{
+       __uint32_t      bb_magic;       /* magic number for block type */
+       __uint16_t      bb_level;       /* 0 is a leaf */
+       __uint16_t      bb_numrecs;     /* current # of data records */
+} xfs_btree_hdr_t;
+
+typedef struct xfs_btree_block
+{
+       xfs_btree_hdr_t bb_h;           /* header */
+       union           {
+               struct  {
+                       xfs_agblock_t   bb_leftsib;
+                       xfs_agblock_t   bb_rightsib;
+               }       s;              /* short form pointers */
+               struct  {
+                       xfs_dfsbno_t    bb_leftsib;
+                       xfs_dfsbno_t    bb_rightsib;
+               }       l;              /* long form pointers */
+       }               bb_u;           /* rest */
+} xfs_btree_block_t;
+
+/*
+ * For logging record fields.
+ */
+#define        XFS_BB_MAGIC            0x01
+#define        XFS_BB_LEVEL            0x02
+#define        XFS_BB_NUMRECS          0x04
+#define        XFS_BB_LEFTSIB          0x08
+#define        XFS_BB_RIGHTSIB         0x10
+#define        XFS_BB_NUM_BITS         5
+#define        XFS_BB_ALL_BITS         ((1 << XFS_BB_NUM_BITS) - 1)
+
+/*
+ * Boolean to select which form of xfs_btree_block_t.bb_u to use.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BTREE_LONG_PTRS)
+int xfs_btree_long_ptrs(xfs_btnum_t btnum);
+#define        XFS_BTREE_LONG_PTRS(btnum)      ((btnum) == XFS_BTNUM_BMAP)
+#else
+#define        XFS_BTREE_LONG_PTRS(btnum)      ((btnum) == XFS_BTNUM_BMAP)
+#endif
+
+/*
+ * Magic numbers for btree blocks.
+ */
+extern const __uint32_t        xfs_magics[];
+
+/*
+ * Maximum and minimum records in a btree block.
+ * Given block size, type prefix, and leaf flag (0 or 1).
+ * The divisor below is equivalent to lf ? (e1) : (e2) but that produces
+ * compiler warnings.
+ */
+#define        XFS_BTREE_BLOCK_MAXRECS(bsz,t,lf)       \
+       ((int)(((bsz) - (uint)sizeof(t ## _block_t)) / \
+        (((lf) * (uint)sizeof(t ## _rec_t)) + \
+         ((1 - (lf)) * \
+          ((uint)sizeof(t ## _key_t) + (uint)sizeof(t ## _ptr_t))))))
+#define        XFS_BTREE_BLOCK_MINRECS(bsz,t,lf)       \
+       (XFS_BTREE_BLOCK_MAXRECS(bsz,t,lf) / 2)
+
+/*
+ * Record, key, and pointer address calculation macros.
+ * Given block size, type prefix, block pointer, and index of requested entry
+ * (first entry numbered 1).
+ */
+#define        XFS_BTREE_REC_ADDR(bsz,t,bb,i,mxr)      \
+       ((t ## _rec_t *)((char *)(bb) + sizeof(t ## _block_t) + \
+        ((i) - 1) * sizeof(t ## _rec_t)))
+#define        XFS_BTREE_KEY_ADDR(bsz,t,bb,i,mxr)      \
+       ((t ## _key_t *)((char *)(bb) + sizeof(t ## _block_t) + \
+        ((i) - 1) * sizeof(t ## _key_t)))
+#define        XFS_BTREE_PTR_ADDR(bsz,t,bb,i,mxr)      \
+       ((t ## _ptr_t *)((char *)(bb) + sizeof(t ## _block_t) + \
+        (mxr) * sizeof(t ## _key_t) + ((i) - 1) * sizeof(t ## _ptr_t)))
+
+#define        XFS_BTREE_MAXLEVELS     8       /* max of all btrees */
+
+/*
+ * Btree cursor structure.
+ * This collects all information needed by the btree code in one place.
+ */
+typedef struct xfs_btree_cur
+{
+       struct xfs_trans        *bc_tp; /* transaction we're in, if any */
+       struct xfs_mount        *bc_mp; /* file system mount struct */
+       union {
+               xfs_alloc_rec_t         a;
+               xfs_bmbt_irec_t         b;
+               xfs_inobt_rec_t         i;
+       }               bc_rec;         /* current insert/search record value */
+       struct xfs_buf  *bc_bufs[XFS_BTREE_MAXLEVELS];  /* buf ptr per level */
+       int             bc_ptrs[XFS_BTREE_MAXLEVELS];   /* key/record # */
+       __uint8_t       bc_ra[XFS_BTREE_MAXLEVELS];     /* readahead bits */
+#define        XFS_BTCUR_LEFTRA        1       /* left sibling has been read-ahead */
+#define        XFS_BTCUR_RIGHTRA       2       /* right sibling has been read-ahead */
+       __uint8_t       bc_nlevels;     /* number of levels in the tree */
+       __uint8_t       bc_blocklog;    /* log2(blocksize) of btree blocks */
+       xfs_btnum_t     bc_btnum;       /* identifies which btree type */
+       union {
+               struct {                        /* needed for BNO, CNT */
+                       struct xfs_buf  *agbp;  /* agf buffer pointer */
+                       xfs_agnumber_t  agno;   /* ag number */
+               } a;
+               struct {                        /* needed for BMAP */
+                       struct xfs_inode *ip;   /* pointer to our inode */
+                       struct xfs_bmap_free *flist;    /* list to free after */
+                       xfs_fsblock_t   firstblock;     /* 1st blk allocated */
+                       int             allocated;      /* count of alloced */
+                       short           forksize;       /* fork's inode space */
+                       char            whichfork;      /* data or attr fork */
+                       char            flags;          /* flags */
+#define        XFS_BTCUR_BPRV_WASDEL   1                       /* was delayed */
+               } b;
+               struct {                        /* needed for INO */
+                       struct xfs_buf  *agbp;  /* agi buffer pointer */
+                       xfs_agnumber_t  agno;   /* ag number */
+               } i;
+       }               bc_private;     /* per-btree type data */
+} xfs_btree_cur_t;
+
+#define        XFS_BTREE_NOERROR       0
+#define        XFS_BTREE_ERROR         1
+
+/*
+ * Convert from buffer to btree block header.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BUF_TO_BLOCK)
+xfs_btree_block_t *xfs_buf_to_block(struct xfs_buf *bp);
+#define        XFS_BUF_TO_BLOCK(bp)    xfs_buf_to_block(bp)
+#else
+#define        XFS_BUF_TO_BLOCK(bp)    ((xfs_btree_block_t *)(XFS_BUF_PTR(bp)))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BUF_TO_LBLOCK)
+xfs_btree_lblock_t *xfs_buf_to_lblock(struct xfs_buf *bp);
+#define        XFS_BUF_TO_LBLOCK(bp)   xfs_buf_to_lblock(bp)
+#else
+#define        XFS_BUF_TO_LBLOCK(bp)   ((xfs_btree_lblock_t *)(XFS_BUF_PTR(bp)))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BUF_TO_SBLOCK)
+xfs_btree_sblock_t *xfs_buf_to_sblock(struct xfs_buf *bp);
+#define        XFS_BUF_TO_SBLOCK(bp)   xfs_buf_to_sblock(bp)
+#else
+#define        XFS_BUF_TO_SBLOCK(bp)   ((xfs_btree_sblock_t *)(XFS_BUF_PTR(bp)))
+#endif
+
+#ifdef __KERNEL__
+
+#ifdef DEBUG
+/*
+ * Debug routine: check that block header is ok.
+ */
+void
+xfs_btree_check_block(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       xfs_btree_block_t       *block, /* generic btree block pointer */
+       int                     level,  /* level of the btree block */
+       struct xfs_buf          *bp);   /* buffer containing block, if any */
+
+/*
+ * Debug routine: check that keys are in the right order.
+ */
+void
+xfs_btree_check_key(
+       xfs_btnum_t             btnum,  /* btree identifier */
+       void                    *ak1,   /* pointer to left (lower) key */
+       void                    *ak2);  /* pointer to right (higher) key */
+
+/*
+ * Debug routine: check that records are in the right order.
+ */
+void
+xfs_btree_check_rec(
+       xfs_btnum_t             btnum,  /* btree identifier */
+       void                    *ar1,   /* pointer to left (lower) record */
+       void                    *ar2);  /* pointer to right (higher) record */
+#else
+#define        xfs_btree_check_block(a,b,c,d)
+#define        xfs_btree_check_key(a,b,c)
+#define        xfs_btree_check_rec(a,b,c)
+#endif /* DEBUG */
+
+/*
+ * Checking routine: check that long form block header is ok.
+ */
+int                                    /* error (0 or EFSCORRUPTED) */
+xfs_btree_check_lblock(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       xfs_btree_lblock_t      *block, /* btree long form block pointer */
+       int                     level,  /* level of the btree block */
+       struct xfs_buf          *bp);   /* buffer containing block, if any */
+
+/*
+ * Checking routine: check that (long) pointer is ok.
+ */
+int                                    /* error (0 or EFSCORRUPTED) */
+xfs_btree_check_lptr(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       xfs_dfsbno_t            ptr,    /* btree block disk address */
+       int                     level); /* btree block level */
+
+/*
+ * Checking routine: check that short form block header is ok.
+ */
+int                                    /* error (0 or EFSCORRUPTED) */
+xfs_btree_check_sblock(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       xfs_btree_sblock_t      *block, /* btree short form block pointer */
+       int                     level,  /* level of the btree block */
+       struct xfs_buf          *bp);   /* buffer containing block */
+
+/*
+ * Checking routine: check that (short) pointer is ok.
+ */
+int                                    /* error (0 or EFSCORRUPTED) */
+xfs_btree_check_sptr(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       xfs_agblock_t           ptr,    /* btree block disk address */
+       int                     level); /* btree block level */
+
+/*
+ * Delete the btree cursor.
+ */
+void
+xfs_btree_del_cursor(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       int                     error); /* del because of error */
+
+/*
+ * Duplicate the btree cursor.
+ * Allocate a new one, copy the record, re-get the buffers.
+ */
+int                                    /* error */
+xfs_btree_dup_cursor(
+       xfs_btree_cur_t         *cur,   /* input cursor */
+       xfs_btree_cur_t         **ncur);/* output cursor */
+
+/*
+ * Change the cursor to point to the first record in the current block
+ * at the given level.  Other levels are unaffected.
+ */
+int                                    /* success=1, failure=0 */
+xfs_btree_firstrec(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       int                     level); /* level to change */
+
+/*
+ * Retrieve the block pointer from the cursor at the given level.
+ * This may be a bmap btree root or from a buffer.
+ */
+xfs_btree_block_t *                    /* generic btree block pointer */
+xfs_btree_get_block(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       int                     level,  /* level in btree */
+       struct xfs_buf          **bpp); /* buffer containing the block */
+
+/*
+ * Get a buffer for the block, return it with no data read.
+ * Long-form addressing.
+ */
+struct xfs_buf *                               /* buffer for fsbno */
+xfs_btree_get_bufl(
+       struct xfs_mount        *mp,    /* file system mount point */
+       struct xfs_trans        *tp,    /* transaction pointer */
+       xfs_fsblock_t           fsbno,  /* file system block number */
+       uint                    lock);  /* lock flags for get_buf */
+
+/*
+ * Get a buffer for the block, return it with no data read.
+ * Short-form addressing.
+ */
+struct xfs_buf *                               /* buffer for agno/agbno */
+xfs_btree_get_bufs(
+       struct xfs_mount        *mp,    /* file system mount point */
+       struct xfs_trans        *tp,    /* transaction pointer */
+       xfs_agnumber_t          agno,   /* allocation group number */
+       xfs_agblock_t           agbno,  /* allocation group block number */
+       uint                    lock);  /* lock flags for get_buf */
+
+/* 
+ * Allocate a new btree cursor.
+ * The cursor is either for allocation (A) or bmap (B).
+ */
+xfs_btree_cur_t *                      /* new btree cursor */
+xfs_btree_init_cursor(
+       struct xfs_mount        *mp,    /* file system mount point */
+       struct xfs_trans        *tp,    /* transaction pointer */
+       struct xfs_buf          *agbp,  /* (A only) buffer for agf structure */
+       xfs_agnumber_t          agno,   /* (A only) allocation group number */
+       xfs_btnum_t             btnum,  /* btree identifier */
+       struct xfs_inode        *ip,    /* (B only) inode owning the btree */
+       int                     whichfork); /* (B only) data/attr fork */
+
+/*
+ * Check for the cursor referring to the last block at the given level.
+ */
+int                                    /* 1=is last block, 0=not last block */
+xfs_btree_islastblock(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       int                     level); /* level to check */
+
+/*
+ * Change the cursor to point to the last record in the current block
+ * at the given level.  Other levels are unaffected.
+ */
+int                                    /* success=1, failure=0 */
+xfs_btree_lastrec(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       int                     level); /* level to change */
+
+/*
+ * Compute first and last byte offsets for the fields given.
+ * Interprets the offsets table, which contains struct field offsets.
+ */
+void
+xfs_btree_offsets(
+       __int64_t               fields, /* bitmask of fields */
+       const short             *offsets,/* table of field offsets */
+       int                     nbits,  /* number of bits to inspect */
+       int                     *first, /* output: first byte offset */
+       int                     *last); /* output: last byte offset */
+
+/*
+ * Get a buffer for the block, return it read in.
+ * Long-form addressing.
+ */
+int                                    /* error */
+xfs_btree_read_bufl(
+       struct xfs_mount        *mp,    /* file system mount point */
+       struct xfs_trans        *tp,    /* transaction pointer */
+       xfs_fsblock_t           fsbno,  /* file system block number */
+       uint                    lock,   /* lock flags for read_buf */
+       struct xfs_buf          **bpp,  /* buffer for fsbno */
+       int                     refval);/* ref count value for buffer */
+
+/*
+ * Get a buffer for the block, return it read in.
+ * Short-form addressing.
+ */
+int                                    /* error */
+xfs_btree_read_bufs(
+       struct xfs_mount        *mp,    /* file system mount point */
+       struct xfs_trans        *tp,    /* transaction pointer */
+       xfs_agnumber_t          agno,   /* allocation group number */
+       xfs_agblock_t           agbno,  /* allocation group block number */
+       uint                    lock,   /* lock flags for read_buf */
+       struct xfs_buf          **bpp,  /* buffer for agno/agbno */
+       int                     refval);/* ref count value for buffer */
+
+/*
+ * Read-ahead the block, don't wait for it, don't return a buffer.
+ * Long-form addressing.
+ */
+void                                   /* error */
+xfs_btree_reada_bufl(
+       struct xfs_mount        *mp,    /* file system mount point */
+       xfs_fsblock_t           fsbno,  /* file system block number */
+       xfs_extlen_t            count); /* count of filesystem blocks */
+
+/*
+ * Read-ahead the block, don't wait for it, don't return a buffer.
+ * Short-form addressing.
+ */
+void                                   /* error */
+xfs_btree_reada_bufs(
+       struct xfs_mount        *mp,    /* file system mount point */
+       xfs_agnumber_t          agno,   /* allocation group number */
+       xfs_agblock_t           agbno,  /* allocation group block number */
+       xfs_extlen_t            count); /* count of filesystem blocks */
+
+/*
+ * Read-ahead btree blocks, at the given level.
+ * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
+ */
+int                                    /* readahead block count */
+xfs_btree_readahead(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       int                     lev,    /* level in btree */
+       int                     lr);    /* left/right bits */
+/*
+ * Set the buffer for level "lev" in the cursor to bp, releasing
+ * any previous buffer.
+ */
+void
+xfs_btree_setbuf(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       int                     lev,    /* level in btree */
+       struct xfs_buf          *bp);   /* new buffer to set */
+
+#endif /* __KERNEL__ */
+
+
+/*
+ * Min and max functions for extlen, agblock, fileoff, and filblks types.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_EXTLEN_MIN)
+xfs_extlen_t xfs_extlen_min(xfs_extlen_t a, xfs_extlen_t b);
+#define        XFS_EXTLEN_MIN(a,b)     xfs_extlen_min(a,b)
+#else
+#define        XFS_EXTLEN_MIN(a,b)     \
+       ((xfs_extlen_t)(a) < (xfs_extlen_t)(b) ? \
+        (xfs_extlen_t)(a) : (xfs_extlen_t)(b))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_EXTLEN_MAX)
+xfs_extlen_t xfs_extlen_max(xfs_extlen_t a, xfs_extlen_t b);
+#define        XFS_EXTLEN_MAX(a,b)     xfs_extlen_max(a,b)
+#else
+#define        XFS_EXTLEN_MAX(a,b)     \
+       ((xfs_extlen_t)(a) > (xfs_extlen_t)(b) ? \
+        (xfs_extlen_t)(a) : (xfs_extlen_t)(b))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGBLOCK_MIN)
+xfs_agblock_t xfs_agblock_min(xfs_agblock_t a, xfs_agblock_t b);
+#define        XFS_AGBLOCK_MIN(a,b)    xfs_agblock_min(a,b)
+#else
+#define        XFS_AGBLOCK_MIN(a,b)    \
+       ((xfs_agblock_t)(a) < (xfs_agblock_t)(b) ? \
+        (xfs_agblock_t)(a) : (xfs_agblock_t)(b))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGBLOCK_MAX)
+xfs_agblock_t xfs_agblock_max(xfs_agblock_t a, xfs_agblock_t b);
+#define        XFS_AGBLOCK_MAX(a,b)    xfs_agblock_max(a,b)
+#else
+#define        XFS_AGBLOCK_MAX(a,b)    \
+       ((xfs_agblock_t)(a) > (xfs_agblock_t)(b) ? \
+        (xfs_agblock_t)(a) : (xfs_agblock_t)(b))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_FILEOFF_MIN)
+xfs_fileoff_t xfs_fileoff_min(xfs_fileoff_t a, xfs_fileoff_t b);
+#define        XFS_FILEOFF_MIN(a,b)    xfs_fileoff_min(a,b)
+#else
+#define        XFS_FILEOFF_MIN(a,b)    \
+       ((xfs_fileoff_t)(a) < (xfs_fileoff_t)(b) ? \
+        (xfs_fileoff_t)(a) : (xfs_fileoff_t)(b))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_FILEOFF_MAX)
+xfs_fileoff_t xfs_fileoff_max(xfs_fileoff_t a, xfs_fileoff_t b);
+#define        XFS_FILEOFF_MAX(a,b)    xfs_fileoff_max(a,b)
+#else
+#define        XFS_FILEOFF_MAX(a,b)    \
+       ((xfs_fileoff_t)(a) > (xfs_fileoff_t)(b) ? \
+        (xfs_fileoff_t)(a) : (xfs_fileoff_t)(b))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_FILBLKS_MIN)
+xfs_filblks_t xfs_filblks_min(xfs_filblks_t a, xfs_filblks_t b);
+#define        XFS_FILBLKS_MIN(a,b)    xfs_filblks_min(a,b)
+#else
+#define        XFS_FILBLKS_MIN(a,b)    \
+       ((xfs_filblks_t)(a) < (xfs_filblks_t)(b) ? \
+        (xfs_filblks_t)(a) : (xfs_filblks_t)(b))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_FILBLKS_MAX)
+xfs_filblks_t xfs_filblks_max(xfs_filblks_t a, xfs_filblks_t b);
+#define        XFS_FILBLKS_MAX(a,b)    xfs_filblks_max(a,b)
+#else
+#define        XFS_FILBLKS_MAX(a,b)    \
+       ((xfs_filblks_t)(a) > (xfs_filblks_t)(b) ? \
+        (xfs_filblks_t)(a) : (xfs_filblks_t)(b))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_FSB_SANITY_CHECK)
+int xfs_fsb_sanity_check(struct xfs_mount *mp, xfs_fsblock_t fsb);
+#define        XFS_FSB_SANITY_CHECK(mp,fsb)    xfs_fsb_sanity_check(mp,fsb)
+#else
+#define        XFS_FSB_SANITY_CHECK(mp,fsb)    \
+       (XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \
+        XFS_FSB_TO_AGBNO(mp, fsb) < mp->m_sb.sb_agblocks)
+#endif
+
+/*
+ * Macros to set EFSCORRUPTED & return/branch.
+ */
+#define        XFS_WANT_CORRUPTED_GOTO(x,l)    \
+       { \
+               int fs_is_ok = (x); \
+               ASSERT(fs_is_ok); \
+               if (!fs_is_ok) { \
+                       error = XFS_ERROR(EFSCORRUPTED); \
+                       goto l; \
+               } \
+       }
+
+#define        XFS_WANT_CORRUPTED_RETURN(x)    \
+       { \
+               int fs_is_ok = (x); \
+               ASSERT(fs_is_ok); \
+               if (!fs_is_ok) \
+                       return XFS_ERROR(EFSCORRUPTED); \
+       }
+
+#endif /* __XFS_BTREE_H__ */
diff --git a/include/xfs_buf_item.h b/include/xfs_buf_item.h
new file mode 100644 (file)
index 0000000..5d097f8
--- /dev/null
@@ -0,0 +1,180 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef        __XFS_BUF_ITEM_H__
+#define        __XFS_BUF_ITEM_H__
+
+/*
+ * This is the structure used to lay out a buf log item in the
+ * log.  The data map describes which 128 byte chunks of the buffer
+ * have been logged.  This structure works only on buffers that
+ * reside up to the first TB in the filesystem.  These buffers are
+ * generated only by pre-6.2 systems and are known as XFS_LI_6_1_BUF.
+ */
+typedef struct xfs_buf_log_format_v1 {
+       unsigned short  blf_type;       /* buf log item type indicator */
+       unsigned short  blf_size;       /* size of this item */
+       __int32_t       blf_blkno;      /* starting blkno of this buf */
+       ushort          blf_flags;      /* misc state */
+       ushort          blf_len;        /* number of blocks in this buf */
+       unsigned int    blf_map_size;   /* size of data bitmap in words */
+       unsigned int    blf_data_map[1];/* variable size bitmap of */
+                                       /*   regions of buffer in this item */
+} xfs_buf_log_format_v1_t;
+
+/*
+ * This is a form of the above structure with a 64 bit blkno field.
+ * For 6.2 and beyond, this is XFS_LI_BUF.  We use this to log everything.
+ */
+typedef struct xfs_buf_log_format_t {
+       unsigned short  blf_type;       /* buf log item type indicator */
+       unsigned short  blf_size;       /* size of this item */
+       ushort          blf_flags;      /* misc state */
+       ushort          blf_len;        /* number of blocks in this buf */
+       __int64_t       blf_blkno;      /* starting blkno of this buf */
+       unsigned int    blf_map_size;   /* size of data bitmap in words */
+       unsigned int    blf_data_map[1];/* variable size bitmap of */
+                                       /*   regions of buffer in this item */
+} xfs_buf_log_format_t;
+
+/*
+ * This flag indicates that the buffer contains on disk inodes
+ * and requires special recovery handling.
+ */
+#define        XFS_BLI_INODE_BUF       0x1
+/*
+ * This flag indicates that the buffer should not be replayed
+ * during recovery because its blocks are being freed.
+ */
+#define        XFS_BLI_CANCEL          0x2
+/*
+ * This flag indicates that the buffer contains on disk 
+ * user or projectdquots and may require special recovery handling.   
+ */
+#define        XFS_BLI_UDQUOT_BUF      0x4
+#define        XFS_BLI_PDQUOT_BUF      0x8
+
+#define        XFS_BLI_CHUNK           128
+#define        XFS_BLI_SHIFT           7
+#define        BIT_TO_WORD_SHIFT       5
+#define        NBWORD                  (NBBY * sizeof(unsigned int))
+
+/*
+ * buf log item flags
+ */
+#define        XFS_BLI_HOLD            0x01
+#define        XFS_BLI_DIRTY           0x02
+#define        XFS_BLI_STALE           0x04
+#define        XFS_BLI_LOGGED          0x08
+#define        XFS_BLI_INODE_ALLOC_BUF 0x10
+
+
+#ifdef __KERNEL__
+
+struct xfs_buf;
+struct ktrace;
+struct xfs_mount;
+
+/*
+ * This is the in core log item structure used to track information
+ * needed to log buffers.  It tracks how many times the lock has been
+ * locked, and which 128 byte chunks of the buffer are dirty.
+ */
+typedef struct xfs_buf_log_item {
+       xfs_log_item_t          bli_item;       /* common item structure */
+       struct xfs_buf          *bli_buf;       /* real buffer pointer */
+       unsigned int            bli_flags;      /* misc flags */
+       unsigned int            bli_recur;      /* lock recursion count */
+       atomic_t                bli_refcount;   /* cnt of tp refs */
+#ifdef DEBUG
+       struct ktrace           *bli_trace;     /* event trace buf */
+#endif
+#ifdef XFS_TRANS_DEBUG
+       char                    *bli_orig;      /* original buffer copy */
+       char                    *bli_logged;    /* bytes logged (bitmap) */
+#endif
+       xfs_buf_log_format_t    bli_format;     /* in-log header */
+} xfs_buf_log_item_t;
+
+/*
+ * This structure is used during recovery to record the buf log
+ * items which have been canceled and should not be replayed.
+ */
+typedef struct xfs_buf_cancel {
+       xfs_daddr_t                     bc_blkno;
+       uint                    bc_len;
+       int                     bc_refcount;
+       struct xfs_buf_cancel   *bc_next;
+} xfs_buf_cancel_t;
+
+#define        XFS_BLI_TRACE_SIZE      32
+
+
+#if defined(XFS_ALL_TRACE)
+#define        XFS_BLI_TRACE
+#endif
+
+#if !defined(DEBUG)
+#undef XFS_BLI_TRACE
+#endif
+
+#if defined(XFS_BLI_TRACE)
+void   xfs_buf_item_trace(char *, xfs_buf_log_item_t *);
+#else
+#define        xfs_buf_item_trace(id, bip)
+#endif
+
+void   xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *);
+void   xfs_buf_item_relse(struct xfs_buf *);
+void   xfs_buf_item_log(xfs_buf_log_item_t *, uint, uint);
+uint   xfs_buf_item_dirty(xfs_buf_log_item_t *);
+int    xfs_buf_item_bits(uint *, uint, uint);
+int    xfs_buf_item_contig_bits(uint *, uint, uint);
+int    xfs_buf_item_next_bit(uint *, uint, uint);
+void   xfs_buf_attach_iodone(struct xfs_buf *,
+                             void(*)(struct xfs_buf *, xfs_log_item_t *),
+                             xfs_log_item_t *);
+void   xfs_buf_iodone_callbacks(struct xfs_buf *);
+void   xfs_buf_iodone(struct xfs_buf *, xfs_buf_log_item_t *);
+
+#ifdef XFS_TRANS_DEBUG
+void
+xfs_buf_item_flush_log_debug(
+       struct xfs_buf *bp,                          
+       uint    first,
+       uint    last);
+#else
+#define        xfs_buf_item_flush_log_debug(bp, first, last)
+#endif
+
+#endif /* __KERNEL__ */
+
+#endif /* __XFS_BUF_ITEM_H__ */
diff --git a/include/xfs_cred.h b/include/xfs_cred.h
new file mode 100644 (file)
index 0000000..523dcdd
--- /dev/null
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#ifndef __XFS_CRED_H__
+#define __XFS_CRED_H__
+
+#include <asm/param.h>         /* For NGROUPS */
+#ifdef __KERNEL__
+#include <linux/capability.h>
+#include <linux/sched.h>
+#endif
+
+/*
+ * Capabilities
+ */
+typedef __uint64_t cap_value_t;
+
+typedef struct cap_set {
+       cap_value_t     cap_effective;  /* use in capability checks */
+       cap_value_t     cap_permitted;  /* combined with file attrs */
+       cap_value_t     cap_inheritable;/* pass through exec */
+} cap_set_t;
+
+
+/*
+ * Mandatory Access Control
+ *
+ * Layout of a composite MAC label:
+ * ml_list contains the list of categories (MSEN) followed by the list of
+ * divisions (MINT). This is actually a header for the data structure which
+ * will have an ml_list with more than one element.
+ *
+ *      -------------------------------
+ *      | ml_msen_type | ml_mint_type |
+ *      -------------------------------
+ *      | ml_level     | ml_grade     |
+ *      -------------------------------
+ *      | ml_catcount                 |
+ *      -------------------------------
+ *      | ml_divcount                 |
+ *      -------------------------------
+ *      | category 1                  |
+ *      | . . .                       |
+ *      | category N                  | (where N = ml_catcount)
+ *      -------------------------------
+ *      | division 1                  |
+ *      | . . .                       |
+ *      | division M                  | (where M = ml_divcount)
+ *      -------------------------------
+ */
+#define MAC_MAX_SETS   250
+typedef struct mac_label {
+       unsigned char   ml_msen_type;   /* MSEN label type */
+       unsigned char   ml_mint_type;   /* MINT label type */
+       unsigned char   ml_level;       /* Hierarchical level  */
+       unsigned char   ml_grade;       /* Hierarchical grade  */
+       unsigned short  ml_catcount;    /* Category count */
+       unsigned short  ml_divcount;    /* Division count */
+                                       /* Category set, then Division set */
+       unsigned short  ml_list[MAC_MAX_SETS];
+} mac_label;
+
+/* Data types required by POSIX P1003.1eD15 */
+typedef struct mac_label * mac_t;
+
+#ifdef __KERNEL__
+extern int mac_enabled;
+extern mac_label *mac_high_low_lp;
+static __inline void mac_never(void) {}
+struct xfs_inode;
+extern int mac_xfs_iaccess(struct xfs_inode *, mode_t);
+#define _MAC_XFS_IACCESS(i,m)  \
+       (mac_enabled? (mac_never(), mac_xfs_iaccess(i,m)): 0)
+#endif /* __KERNEL__ */
+
+#define MACWRITE       00200
+#define SGI_MAC_FILE "/dev/null"
+#define SGI_MAC_FILE_SIZE 10
+#define SGI_CAP_FILE "/dev/null"
+#define SGI_CAP_FILE_SIZE 10
+
+/* MSEN label type names. Choose an upper case ASCII character.  */
+#define MSEN_ADMIN_LABEL       'A'     /* Admin: low<admin != tcsec<high */
+#define MSEN_EQUAL_LABEL       'E'     /* Wildcard - always equal */
+#define MSEN_HIGH_LABEL                'H'     /* System High - always dominates */
+#define MSEN_MLD_HIGH_LABEL    'I'     /* System High, multi-level dir */
+#define MSEN_LOW_LABEL         'L'     /* System Low - always dominated */
+#define MSEN_MLD_LABEL         'M'     /* TCSEC label on a multi-level dir */
+#define MSEN_MLD_LOW_LABEL     'N'     /* System Low, multi-level dir */
+#define MSEN_TCSEC_LABEL       'T'     /* TCSEC label */
+#define MSEN_UNKNOWN_LABEL     'U'     /* unknown label */
+
+/* MINT label type names. Choose a lower case ASCII character.  */
+#define MINT_BIBA_LABEL                'b'     /* Dual of a TCSEC label */
+#define MINT_EQUAL_LABEL       'e'     /* Wildcard - always equal */
+#define MINT_HIGH_LABEL                'h'     /* High Grade - always dominates */
+#define MINT_LOW_LABEL         'l'     /* Low Grade - always dominated */
+
+
+/*
+ * Credentials
+ */
+typedef struct cred {
+       int     cr_ref;                 /* reference count */
+       ushort  cr_ngroups;             /* number of groups in cr_groups */
+       uid_t   cr_uid;                 /* effective user id */
+       gid_t   cr_gid;                 /* effective group id */
+       uid_t   cr_ruid;                /* real user id */
+       gid_t   cr_rgid;                /* real group id */
+       uid_t   cr_suid;                /* "saved" user id (from exec) */
+       gid_t   cr_sgid;                /* "saved" group id (from exec) */
+       struct mac_label *cr_mac;       /* MAC label for B1 and beyond */
+       cap_set_t         cr_cap;       /* capability (privilege) sets */
+       gid_t   cr_groups[NGROUPS];     /* supplementary group list */
+} cred_t;
+
+#ifdef __KERNEL__
+extern void cred_init(void);
+static __inline cred_t *get_current_cred(void) { return NULL; }
+extern struct cred *sys_cred;
+#endif /* __KERNEL__ */
+
+#endif  /* __XFS_CRED_H__ */
diff --git a/include/xfs_da_btree.h b/include/xfs_da_btree.h
new file mode 100644 (file)
index 0000000..a9d2a15
--- /dev/null
@@ -0,0 +1,340 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_DA_BTREE_H__
+#define        __XFS_DA_BTREE_H__
+
+struct xfs_buf;
+struct xfs_bmap_free;
+struct xfs_inode;
+struct xfs_mount;
+struct xfs_trans;
+struct zone;
+
+/*========================================================================
+ * Directory Structure when greater than XFS_LBSIZE(mp) bytes.
+ *========================================================================*/
+
+/*
+ * This structure is common to both leaf nodes and non-leaf nodes in the Btree.
+ *
+ * Is is used to manage a doubly linked list of all blocks at the same
+ * level in the Btree, and to identify which type of block this is.
+ */
+#define XFS_DA_NODE_MAGIC      0xfebe  /* magic number: non-leaf blocks */
+#define XFS_DIR_LEAF_MAGIC     0xfeeb  /* magic number: directory leaf blks */
+#define XFS_ATTR_LEAF_MAGIC    0xfbee  /* magic number: attribute leaf blks */
+#define        XFS_DIR2_LEAF1_MAGIC    0xd2f1  /* magic number: v2 dirlf single blks */
+#define        XFS_DIR2_LEAFN_MAGIC    0xd2ff  /* magic number: v2 dirlf multi blks */
+
+#define        XFS_DIRX_LEAF_MAGIC(mp) \
+       (XFS_DIR_IS_V1(mp) ? XFS_DIR_LEAF_MAGIC : XFS_DIR2_LEAFN_MAGIC)
+
+typedef struct xfs_da_blkinfo {
+       xfs_dablk_t forw;                       /* previous block in list */
+       xfs_dablk_t back;                       /* following block in list */
+       __uint16_t magic;                       /* validity check on block */
+       __uint16_t pad;                         /* unused */
+} xfs_da_blkinfo_t;
+
+/*
+ * This is the structure of the root and intermediate nodes in the Btree.
+ * The leaf nodes are defined above.
+ *
+ * Entries are not packed.
+ *
+ * Since we have duplicate keys, use a binary search but always follow
+ * all match in the block, not just the first match found.
+ */
+#define        XFS_DA_NODE_MAXDEPTH    5       /* max depth of Btree */
+
+typedef struct xfs_da_intnode {
+       struct xfs_da_node_hdr {        /* constant-structure header block */
+               xfs_da_blkinfo_t info;  /* block type, links, etc. */
+               __uint16_t count;       /* count of active entries */
+               __uint16_t level;       /* level above leaves (leaf == 0) */
+       } hdr;
+       struct xfs_da_node_entry {
+               xfs_dahash_t hashval;   /* hash value for this descendant */
+               xfs_dablk_t before;     /* Btree block before this key */
+       } btree[1];                     /* variable sized array of keys */
+} xfs_da_intnode_t;
+typedef struct xfs_da_node_hdr xfs_da_node_hdr_t;
+typedef struct xfs_da_node_entry xfs_da_node_entry_t;
+
+#define XFS_DA_NODE_ENTSIZE_BYNAME     /* space a name uses */ \
+       (sizeof(xfs_da_node_entry_t))
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DA_NODE_ENTRIES)
+int xfs_da_node_entries(struct xfs_mount *mp);
+#define XFS_DA_NODE_ENTRIES(mp)                xfs_da_node_entries(mp)
+#else
+#define        XFS_DA_NODE_ENTRIES(mp)         ((mp)->m_da_node_ents)
+#endif
+
+#define        XFS_DA_MAXHASH  ((xfs_dahash_t)-1) /* largest valid hash value */
+
+/*
+ * Macros used by directory code to interface to the filesystem.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LBSIZE)
+int xfs_lbsize(struct xfs_mount *mp);
+#define        XFS_LBSIZE(mp)                  xfs_lbsize(mp)
+#else
+#define        XFS_LBSIZE(mp)  ((mp)->m_sb.sb_blocksize)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LBLOG)
+int xfs_lblog(struct xfs_mount *mp);
+#define        XFS_LBLOG(mp)                   xfs_lblog(mp)
+#else
+#define        XFS_LBLOG(mp)   ((mp)->m_sb.sb_blocklog)
+#endif
+
+/*
+ * Macros used by directory code to interface to the kernel
+ */
+
+/*
+ * Macros used to manipulate directory off_t's
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DA_MAKE_BNOENTRY)
+__uint32_t xfs_da_make_bnoentry(struct xfs_mount *mp, xfs_dablk_t bno,
+                               int entry);
+#define        XFS_DA_MAKE_BNOENTRY(mp,bno,entry)      \
+       xfs_da_make_bnoentry(mp,bno,entry)
+#else
+#define        XFS_DA_MAKE_BNOENTRY(mp,bno,entry) \
+       (((bno) << (mp)->m_dircook_elog) | (entry))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DA_MAKE_COOKIE)
+xfs_off_t xfs_da_make_cookie(struct xfs_mount *mp, xfs_dablk_t bno, int entry,
+                               xfs_dahash_t hash);
+#define        XFS_DA_MAKE_COOKIE(mp,bno,entry,hash)   \
+       xfs_da_make_cookie(mp,bno,entry,hash)
+#else
+#define        XFS_DA_MAKE_COOKIE(mp,bno,entry,hash) \
+       (((xfs_off_t)XFS_DA_MAKE_BNOENTRY(mp, bno, entry) << 32) | (hash))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DA_COOKIE_HASH)
+xfs_dahash_t xfs_da_cookie_hash(struct xfs_mount *mp, xfs_off_t cookie);
+#define        XFS_DA_COOKIE_HASH(mp,cookie)           xfs_da_cookie_hash(mp,cookie)
+#else
+#define        XFS_DA_COOKIE_HASH(mp,cookie)   ((xfs_dahash_t)(cookie))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DA_COOKIE_BNO)
+xfs_dablk_t xfs_da_cookie_bno(struct xfs_mount *mp, xfs_off_t cookie);
+#define        XFS_DA_COOKIE_BNO(mp,cookie)            xfs_da_cookie_bno(mp,cookie)
+#else
+#define        XFS_DA_COOKIE_BNO(mp,cookie) \
+       (((xfs_off_t)(cookie) >> 31) == -1LL ? \
+               (xfs_dablk_t)0 : \
+               (xfs_dablk_t)((xfs_off_t)(cookie) >> ((mp)->m_dircook_elog + 32)))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DA_COOKIE_ENTRY)
+int xfs_da_cookie_entry(struct xfs_mount *mp, xfs_off_t cookie);
+#define        XFS_DA_COOKIE_ENTRY(mp,cookie)          xfs_da_cookie_entry(mp,cookie)
+#else
+#define        XFS_DA_COOKIE_ENTRY(mp,cookie) \
+       (((xfs_off_t)(cookie) >> 31) == -1LL ? \
+               (xfs_dablk_t)0 : \
+               (xfs_dablk_t)(((xfs_off_t)(cookie) >> 32) & \
+                             ((1 << (mp)->m_dircook_elog) - 1)))
+#endif
+
+
+/*========================================================================
+ * Btree searching and modification structure definitions.
+ *========================================================================*/
+
+/*
+ * Structure to ease passing around component names.
+ */
+typedef struct xfs_da_args {
+       char            *name;          /* string (maybe not NULL terminated) */
+       int             namelen;        /* length of string (maybe no NULL) */
+       char            *value;         /* set of bytes (maybe contain NULLs) */
+       int             valuelen;       /* length of value */
+       int             flags;          /* argument flags (eg: ATTR_NOCREATE) */
+       xfs_dahash_t    hashval;        /* hash value of name */
+       xfs_ino_t       inumber;        /* input/output inode number */
+       struct xfs_inode *dp;           /* directory inode to manipulate */
+       xfs_fsblock_t   *firstblock;    /* ptr to firstblock for bmap calls */
+       struct xfs_bmap_free *flist;    /* ptr to freelist for bmap_finish */
+       struct xfs_trans *trans;        /* current trans (changes over time) */
+       xfs_extlen_t    total;          /* total blocks needed, for 1st bmap */
+       int             whichfork;      /* data or attribute fork */
+       xfs_dablk_t     blkno;          /* blkno of attr leaf of interest */
+       int             index;          /* index of attr of interest in blk */
+       xfs_dablk_t     rmtblkno;       /* remote attr value starting blkno */
+       int             rmtblkcnt;      /* remote attr value block count */
+       int             rename;         /* T/F: this is an atomic rename op */
+       xfs_dablk_t     blkno2;         /* blkno of 2nd attr leaf of interest */
+       int             index2;         /* index of 2nd attr in blk */
+       xfs_dablk_t     rmtblkno2;      /* remote attr value starting blkno */
+       int             rmtblkcnt2;     /* remote attr value block count */
+       int             justcheck;      /* check for ok with no space */
+       int             addname;        /* T/F: this is an add operation */
+       int             oknoent;        /* T/F: ok to return ENOENT, else die */
+} xfs_da_args_t;
+
+/*
+ * Structure to describe buffer(s) for a block.
+ * This is needed in the directory version 2 format case, when 
+ * multiple non-contiguous fsblocks might be needed to cover one
+ * logical directory block.
+ * If the buffer count is 1 then the data pointer points to the
+ * same place as the b_addr field for the buffer, else to kmem_alloced memory.
+ */
+typedef struct xfs_dabuf {
+       int             nbuf;           /* number of buffer pointers present */
+       short           dirty;          /* data needs to be copied back */
+       short           bbcount;        /* how large is data in bbs */
+       void            *data;          /* pointer for buffers' data */
+#ifdef XFS_DABUF_DEBUG
+       inst_t          *ra;            /* return address of caller to make */
+       struct xfs_dabuf *next;         /* next in global chain */
+       struct xfs_dabuf *prev;         /* previous in global chain */
+       dev_t           dev;            /* device for buffer */
+       xfs_daddr_t             blkno;          /* daddr first in bps[0] */
+#endif
+       struct xfs_buf  *bps[1];        /* actually nbuf of these */
+} xfs_dabuf_t;
+#define        XFS_DA_BUF_SIZE(n)      \
+       (sizeof(xfs_dabuf_t) + sizeof(struct xfs_buf *) * ((n) - 1))
+
+#ifdef XFS_DABUF_DEBUG
+extern xfs_dabuf_t     *xfs_dabuf_global_list;
+#endif
+
+/*
+ * Storage for holding state during Btree searches and split/join ops.
+ *
+ * Only need space for 5 intermediate nodes.  With a minimum of 62-way
+ * fanout to the Btree, we can support over 900 million directory blocks,
+ * which is slightly more than enough.
+ */
+typedef struct xfs_da_state_blk {
+       xfs_dabuf_t     *bp;            /* buffer containing block */
+       xfs_dablk_t     blkno;          /* filesystem blkno of buffer */
+       xfs_daddr_t             disk_blkno;     /* on-disk blkno (in BBs) of buffer */
+       int             index;          /* relevant index into block */
+       xfs_dahash_t    hashval;        /* last hash value in block */
+       int             magic;          /* blk's magic number, ie: blk type */
+} xfs_da_state_blk_t;
+
+typedef struct xfs_da_state_path {
+       int                     active;         /* number of active levels */
+       xfs_da_state_blk_t      blk[XFS_DA_NODE_MAXDEPTH];
+} xfs_da_state_path_t;
+
+typedef struct xfs_da_state {
+       xfs_da_args_t           *args;          /* filename arguments */
+       struct xfs_mount        *mp;            /* filesystem mount point */
+       int                     blocksize;      /* logical block size */
+       int                     inleaf;         /* insert into 1->lf, 0->splf */
+       xfs_da_state_path_t     path;           /* search/split paths */
+       xfs_da_state_path_t     altpath;        /* alternate path for join */
+       int                     extravalid;     /* T/F: extrablk is in use */
+       int                     extraafter;     /* T/F: extrablk is after new */
+       xfs_da_state_blk_t      extrablk;       /* for double-splits on leafs */
+                                               /* for dirv2 extrablk is data */
+} xfs_da_state_t;
+
+/*
+ * Utility macros to aid in logging changed structure fields.
+ */
+#define XFS_DA_LOGOFF(BASE, ADDR)      ((char *)(ADDR) - (char *)(BASE))
+#define XFS_DA_LOGRANGE(BASE, ADDR, SIZE)      \
+               (uint)(XFS_DA_LOGOFF(BASE, ADDR)), \
+               (uint)(XFS_DA_LOGOFF(BASE, ADDR)+(SIZE)-1)
+
+/*========================================================================
+ * Function prototypes for the kernel.
+ *========================================================================*/
+
+/*
+ * Routines used for growing the Btree.
+ */
+int    xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level,
+                                        xfs_dabuf_t **bpp, int whichfork);
+int    xfs_da_split(xfs_da_state_t *state);
+
+/*
+ * Routines used for shrinking the Btree.
+ */
+int    xfs_da_join(xfs_da_state_t *state);
+void   xfs_da_fixhashpath(xfs_da_state_t *state,
+                                         xfs_da_state_path_t *path_to_to_fix);
+
+/*
+ * Routines used for finding things in the Btree.
+ */
+int    xfs_da_node_lookup_int(xfs_da_state_t *state, int *result);
+int    xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
+                                        int forward, int release, int *result);
+/*
+ * Utility routines.
+ */
+int    xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
+                                        xfs_da_state_blk_t *save_blk);
+int    xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
+                                      xfs_da_state_blk_t *new_blk);
+
+/*
+ * Utility routines.
+ */
+int    xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno);
+int    xfs_da_get_buf(struct xfs_trans *trans, struct xfs_inode *dp,
+                             xfs_dablk_t bno, xfs_daddr_t mappedbno,
+                             xfs_dabuf_t **bp, int whichfork);
+int    xfs_da_read_buf(struct xfs_trans *trans, struct xfs_inode *dp,
+                              xfs_dablk_t bno, xfs_daddr_t mappedbno,
+                              xfs_dabuf_t **bpp, int whichfork);
+xfs_daddr_t    xfs_da_reada_buf(struct xfs_trans *trans, struct xfs_inode *dp,
+                       xfs_dablk_t bno, int whichfork);
+int    xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
+                                         xfs_dabuf_t *dead_buf);
+
+uint xfs_da_hashname(char *name_string, int name_length);
+uint xfs_da_log2_roundup(uint i);
+xfs_da_state_t *xfs_da_state_alloc(void);
+void xfs_da_state_free(xfs_da_state_t *state);
+void xfs_da_state_kill_altpath(xfs_da_state_t *state);
+
+void xfs_da_buf_done(xfs_dabuf_t *dabuf);
+void xfs_da_log_buf(struct xfs_trans *tp, xfs_dabuf_t *dabuf, uint first,
+                          uint last);
+void xfs_da_brelse(struct xfs_trans *tp, xfs_dabuf_t *dabuf);
+void xfs_da_binval(struct xfs_trans *tp, xfs_dabuf_t *dabuf);
+xfs_daddr_t xfs_da_blkno(xfs_dabuf_t *dabuf);
+
+extern struct xfs_zone *xfs_da_state_zone;
+
+#endif /* __XFS_DA_BTREE_H__ */
diff --git a/include/xfs_dfrag.h b/include/xfs_dfrag.h
new file mode 100644 (file)
index 0000000..a6f1b09
--- /dev/null
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_DFRAG_H__
+#define        __XFS_DFRAG_H__
+
+/*
+ * Structure passed to xfs_swapext
+ */
+
+typedef struct xfs_swapext
+{
+       __int64_t       sx_version;     /* version */   
+       __int64_t       sx_fdtarget;    /* fd of target file */
+       __int64_t       sx_fdtmp;       /* fd of tmp file */
+       xfs_off_t       sx_offset;      /* offset into file */
+       xfs_off_t       sx_length;      /* leng from offset */
+       char            sx_pad[16];     /* pad space, unused */
+       xfs_bstat_t     sx_stat;        /* stat of target b4 copy */
+} xfs_swapext_t;
+
+/* 
+ * Version flag
+ */
+#define XFS_SX_VERSION         0
+
+#ifdef __KERNEL__
+/*
+ * Prototypes for visible xfs_dfrag.c routines.
+ */
+
+/*
+ * Syscall interface for xfs_swapext
+ */
+int    xfs_swapext(struct xfs_swapext *sx);
+
+#endif /* __KERNEL__ */
+
+#endif /* __XFS_DFRAG_H__ */
diff --git a/include/xfs_dinode.h b/include/xfs_dinode.h
new file mode 100644 (file)
index 0000000..7bda53e
--- /dev/null
@@ -0,0 +1,476 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_DINODE_H__
+#define        __XFS_DINODE_H__
+
+struct xfs_buf;
+struct xfs_mount;
+
+#define        XFS_DINODE_VERSION_1    1
+#define        XFS_DINODE_VERSION_2    2
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DINODE_GOOD_VERSION)
+int xfs_dinode_good_version(int v);
+#define XFS_DINODE_GOOD_VERSION(v)     xfs_dinode_good_version(v)
+#else
+#define XFS_DINODE_GOOD_VERSION(v)     (((v) == XFS_DINODE_VERSION_1) || \
+                                        ((v) == XFS_DINODE_VERSION_2))
+#endif
+#define        XFS_DINODE_MAGIC        0x494e  /* 'IN' */
+
+/*
+ * Disk inode structure.
+ * This is just the header; the inode is expanded to fill a variable size
+ * with the last field expanding.  It is split into the core and "other"
+ * because we only need the core part in the in-core inode.
+ */
+typedef struct xfs_timestamp {
+       __int32_t       t_sec;          /* timestamp seconds */
+       __int32_t       t_nsec;         /* timestamp nanoseconds */
+} xfs_timestamp_t;
+
+/*
+ * Note: Coordinate changes to this structure with the XFS_DI_* #defines
+ * below and the offsets table in xfs_ialloc_log_di().
+ */
+typedef struct xfs_dinode_core
+{
+       __uint16_t      di_magic;       /* inode magic # = XFS_DINODE_MAGIC */
+       __uint16_t      di_mode;        /* mode and type of file */
+       __int8_t        di_version;     /* inode version */
+       __int8_t        di_format;      /* format of di_c data */
+       __uint16_t      di_onlink;      /* old number of links to file */
+       __uint32_t      di_uid;         /* owner's user id */
+       __uint32_t      di_gid;         /* owner's group id */
+       __uint32_t      di_nlink;       /* number of links to file */
+       __uint16_t      di_projid;      /* owner's project id */
+       __uint8_t       di_pad[10];     /* unused, zeroed space */
+       xfs_timestamp_t di_atime;       /* time last accessed */
+       xfs_timestamp_t di_mtime;       /* time last modified */
+       xfs_timestamp_t di_ctime;       /* time created/inode modified */
+       xfs_fsize_t     di_size;        /* number of bytes in file */
+       xfs_drfsbno_t   di_nblocks;     /* # of direct & btree blocks used */
+       xfs_extlen_t    di_extsize;     /* basic/minimum extent size for file */
+       xfs_extnum_t    di_nextents;    /* number of extents in data fork */
+       xfs_aextnum_t   di_anextents;   /* number of extents in attribute fork*/
+       __uint8_t       di_forkoff;     /* attr fork offs, <<3 for 64b align */
+       __int8_t        di_aformat;     /* format of attr fork's data */
+       __uint32_t      di_dmevmask;    /* DMIG event mask */
+       __uint16_t      di_dmstate;     /* DMIG state info */
+       __uint16_t      di_flags;       /* random flags, XFS_DIFLAG_... */
+       __uint32_t      di_gen;         /* generation number */
+} xfs_dinode_core_t;
+
+typedef struct xfs_dinode
+{
+       xfs_dinode_core_t       di_core;
+       /*
+        * In adding anything between the core and the union, be
+        * sure to update the macros like XFS_LITINO below and
+        * XFS_BMAP_RBLOCK_DSIZE in xfs_bmap_btree.h.
+        */
+       xfs_agino_t             di_next_unlinked;/* agi unlinked list ptr */
+       union {
+               xfs_bmdr_block_t di_bmbt;       /* btree root block */
+               xfs_bmbt_rec_32_t di_bmx[1];    /* extent list */
+               xfs_dir_shortform_t di_dirsf;   /* shortform directory */
+               xfs_dir2_sf_t   di_dir2sf;      /* shortform directory v2 */
+               char            di_c[1];        /* local contents */
+               xfs_dev_t       di_dev;         /* device for IFCHR/IFBLK */
+               uuid_t          di_muuid;       /* mount point value */
+               char            di_symlink[1];  /* local symbolic link */
+       }               di_u;
+       union {
+               xfs_bmdr_block_t di_abmbt;      /* btree root block */
+               xfs_bmbt_rec_32_t di_abmx[1];   /* extent list */
+               xfs_attr_shortform_t di_attrsf; /* shortform attribute list */
+       }               di_a;
+} xfs_dinode_t;
+
+/*
+ * The 32 bit link count in the inode theoretically maxes out at UINT_MAX.
+ * Since the pathconf interface is signed, we use 2^31 - 1 instead.
+ * The old inode format had a 16 bit link count, so its maximum is USHRT_MAX.
+ */
+#define        XFS_MAXLINK             ((1U << 31) - 1U)
+#define        XFS_MAXLINK_1           65535U
+
+/*
+ * Bit names for logging disk inodes only
+ */
+#define        XFS_DI_MAGIC            0x0000001
+#define        XFS_DI_MODE             0x0000002
+#define        XFS_DI_VERSION          0x0000004
+#define        XFS_DI_FORMAT           0x0000008
+#define        XFS_DI_ONLINK           0x0000010
+#define        XFS_DI_UID              0x0000020
+#define        XFS_DI_GID              0x0000040
+#define        XFS_DI_NLINK            0x0000080
+#define        XFS_DI_PROJID           0x0000100
+#define        XFS_DI_PAD              0x0000200
+#define        XFS_DI_ATIME            0x0000400
+#define        XFS_DI_MTIME            0x0000800
+#define        XFS_DI_CTIME            0x0001000
+#define        XFS_DI_SIZE             0x0002000
+#define        XFS_DI_NBLOCKS          0x0004000
+#define        XFS_DI_EXTSIZE          0x0008000
+#define        XFS_DI_NEXTENTS         0x0010000
+#define        XFS_DI_NAEXTENTS        0x0020000
+#define        XFS_DI_FORKOFF          0x0040000
+#define        XFS_DI_AFORMAT          0x0080000
+#define        XFS_DI_DMEVMASK         0x0100000
+#define        XFS_DI_DMSTATE          0x0200000
+#define        XFS_DI_FLAGS            0x0400000
+#define        XFS_DI_GEN              0x0800000
+#define        XFS_DI_NEXT_UNLINKED    0x1000000
+#define        XFS_DI_U                0x2000000
+#define        XFS_DI_A                0x4000000
+#define        XFS_DI_NUM_BITS         27
+#define        XFS_DI_ALL_BITS         ((1 << XFS_DI_NUM_BITS) - 1)
+#define        XFS_DI_CORE_BITS        (XFS_DI_ALL_BITS & ~(XFS_DI_U|XFS_DI_A))
+
+/*
+ * Values for di_format
+ */
+typedef enum xfs_dinode_fmt
+{
+       XFS_DINODE_FMT_DEV,             /* CHR, BLK: di_dev */
+       XFS_DINODE_FMT_LOCAL,           /* DIR, REG: di_c */
+                                       /* LNK: di_symlink */
+       XFS_DINODE_FMT_EXTENTS,         /* DIR, REG, LNK: di_bmx */
+       XFS_DINODE_FMT_BTREE,           /* DIR, REG, LNK: di_bmbt */
+       XFS_DINODE_FMT_UUID             /* MNT: di_uuid */
+} xfs_dinode_fmt_t;
+
+/*
+ * Inode minimum and maximum sizes.
+ */
+#define        XFS_DINODE_MIN_LOG      8
+#define        XFS_DINODE_MAX_LOG      11
+#define        XFS_DINODE_MIN_SIZE     (1 << XFS_DINODE_MIN_LOG)
+#define        XFS_DINODE_MAX_SIZE     (1 << XFS_DINODE_MAX_LOG)
+
+/*
+ * Inode size for given fs.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LITINO)
+int xfs_litino(struct xfs_mount *mp);
+#define        XFS_LITINO(mp)          xfs_litino(mp)
+#else
+#define        XFS_LITINO(mp)  ((mp)->m_litino)
+#endif
+#define        XFS_BROOT_SIZE_ADJ      \
+       (sizeof(xfs_bmbt_block_t) - sizeof(xfs_bmdr_block_t))
+
+/*
+ * Fork identifiers.  Here so utilities can use them without including
+ * xfs_inode.h.
+ */
+#define        XFS_DATA_FORK   0
+#define        XFS_ATTR_FORK   1
+
+/*
+ * Inode data & attribute fork sizes, per inode.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_CFORK_Q)
+int xfs_cfork_q_arch(xfs_dinode_core_t *dcp, xfs_arch_t arch);
+int xfs_cfork_q(xfs_dinode_core_t *dcp);
+#define        XFS_CFORK_Q_ARCH(dcp,arch)          xfs_cfork_q_arch(dcp,arch)
+#define        XFS_CFORK_Q(dcp)                    xfs_cfork_q(dcp)
+#else
+#define        XFS_CFORK_Q_ARCH(dcp,arch)          (INT_GET((dcp)->di_forkoff, arch) != 0)
+#define XFS_CFORK_Q(dcp)                    XFS_CFORK_Q_ARCH(dcp,ARCH_NOCONVERT)
+
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_CFORK_BOFF)
+int xfs_cfork_boff_arch(xfs_dinode_core_t *dcp, xfs_arch_t arch);
+int xfs_cfork_boff(xfs_dinode_core_t *dcp);
+#define        XFS_CFORK_BOFF_ARCH(dcp,arch)       xfs_cfork_boff_arch(dcp,arch)
+#define        XFS_CFORK_BOFF(dcp)                 xfs_cfork_boff(dcp)
+#else
+#define        XFS_CFORK_BOFF_ARCH(dcp,arch)       ((int)(INT_GET((dcp)->di_forkoff, arch) << 3))
+#define XFS_CFORK_BOFF(dcp)                 XFS_CFORK_BOFF_ARCH(dcp,ARCH_NOCONVERT)
+
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_CFORK_DSIZE)
+int xfs_cfork_dsize_arch(xfs_dinode_core_t *dcp, struct xfs_mount *mp, xfs_arch_t arch);
+int xfs_cfork_dsize(xfs_dinode_core_t *dcp, struct xfs_mount *mp);
+#define        XFS_CFORK_DSIZE_ARCH(dcp,mp,arch)   xfs_cfork_dsize_arch(dcp,mp,arch)
+#define        XFS_CFORK_DSIZE(dcp,mp)             xfs_cfork_dsize(dcp,mp)
+#else
+#define        XFS_CFORK_DSIZE_ARCH(dcp,mp,arch) \
+       (XFS_CFORK_Q_ARCH(dcp, arch) ? XFS_CFORK_BOFF_ARCH(dcp, arch) : XFS_LITINO(mp))
+#define XFS_CFORK_DSIZE(dcp,mp)             XFS_CFORK_DSIZE_ARCH(dcp,mp,ARCH_NOCONVERT)
+
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_CFORK_ASIZE)
+int xfs_cfork_asize_arch(xfs_dinode_core_t *dcp, struct xfs_mount *mp, xfs_arch_t arch);
+int xfs_cfork_asize(xfs_dinode_core_t *dcp, struct xfs_mount *mp);
+#define        XFS_CFORK_ASIZE_ARCH(dcp,mp,arch)   xfs_cfork_asize_arch(dcp,mp,arch)
+#define        XFS_CFORK_ASIZE(dcp,mp)             xfs_cfork_asize(dcp,mp) 
+#else
+#define        XFS_CFORK_ASIZE_ARCH(dcp,mp,arch) \
+       (XFS_CFORK_Q_ARCH(dcp, arch) ? XFS_LITINO(mp) - XFS_CFORK_BOFF_ARCH(dcp, arch) : 0)
+#define XFS_CFORK_ASIZE(dcp,mp)             XFS_CFORK_ASIZE_ARCH(dcp,mp,ARCH_NOCONVERT)
+
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_CFORK_SIZE)
+int xfs_cfork_size_arch(xfs_dinode_core_t *dcp, struct xfs_mount *mp, int w, xfs_arch_t arch);
+int xfs_cfork_size(xfs_dinode_core_t *dcp, struct xfs_mount *mp, int w);
+#define        XFS_CFORK_SIZE_ARCH(dcp,mp,w,arch)  xfs_cfork_size_arch(dcp,mp,w,arch)
+#define        XFS_CFORK_SIZE(dcp,mp,w)            xfs_cfork_size(dcp,mp,w)
+#else
+#define        XFS_CFORK_SIZE_ARCH(dcp,mp,w,arch) \
+       ((w) == XFS_DATA_FORK ? \
+               XFS_CFORK_DSIZE_ARCH(dcp, mp, arch) : XFS_CFORK_ASIZE_ARCH(dcp, mp, arch))
+#define XFS_CFORK_SIZE(dcp,mp,w)            XFS_CFORK_SIZE_ARCH(dcp,mp,w,ARCH_NOCONVERT)
+
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DFORK_DSIZE)
+int xfs_dfork_dsize_arch(xfs_dinode_t *dip, struct xfs_mount *mp, xfs_arch_t arch);
+int xfs_dfork_dsize(xfs_dinode_t *dip, struct xfs_mount *mp);
+#define        XFS_DFORK_DSIZE_ARCH(dip,mp,arch)   xfs_dfork_dsize_arch(dip,mp,arch)
+#define        XFS_DFORK_DSIZE(dip,mp)             xfs_dfork_dsize(dip,mp)
+#else
+#define        XFS_DFORK_DSIZE_ARCH(dip,mp,arch)   XFS_CFORK_DSIZE_ARCH(&(dip)->di_core, mp, arch)
+#define XFS_DFORK_DSIZE(dip,mp)             XFS_DFORK_DSIZE_ARCH(dip,mp,ARCH_NOCONVERT)
+
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DFORK_ASIZE)
+int xfs_dfork_asize_arch(xfs_dinode_t *dip, struct xfs_mount *mp, xfs_arch_t arch);
+int xfs_dfork_asize(xfs_dinode_t *dip, struct xfs_mount *mp);
+#define        XFS_DFORK_ASIZE_ARCH(dip,mp,arch)   xfs_dfork_asize_arch(dip,mp,arch)
+#define        XFS_DFORK_ASIZE(dip,mp)             xfs_dfork_asize(dip,mp)
+#else
+#define        XFS_DFORK_ASIZE_ARCH(dip,mp,arch)   XFS_CFORK_ASIZE_ARCH(&(dip)->di_core, mp, arch)
+#define XFS_DFORK_ASIZE(dip,mp)             XFS_DFORK_ASIZE_ARCH(dip,mp,ARCH_NOCONVERT)
+
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DFORK_SIZE)
+int xfs_dfork_size_arch(xfs_dinode_t *dip, struct xfs_mount *mp, int w, xfs_arch_t arch);
+int xfs_dfork_size(xfs_dinode_t *dip, struct xfs_mount *mp, int w);
+#define        XFS_DFORK_SIZE_ARCH(dip,mp,w,arch)  xfs_dfork_size_arch(dip,mp,w,arch)
+#define        XFS_DFORK_SIZE(dip,mp,w)            xfs_dfork_size(dip,mp,w) 
+#else
+#define        XFS_DFORK_SIZE_ARCH(dip,mp,w,arch)  XFS_CFORK_SIZE_ARCH(&(dip)->di_core, mp, w, arch)
+#define XFS_DFORK_SIZE(dip,mp,w)            XFS_DFORK_SIZE_ARCH(dip,mp,w,ARCH_NOCONVERT)
+
+#endif
+
+/*
+ * Macros for accessing per-fork disk inode information.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DFORK_Q)
+int xfs_dfork_q_arch(xfs_dinode_t *dip, xfs_arch_t arch);
+int xfs_dfork_q(xfs_dinode_t *dip);
+#define        XFS_DFORK_Q_ARCH(dip,arch)          xfs_dfork_q_arch(dip,arch)
+#define        XFS_DFORK_Q(dip)                    xfs_dfork_q(dip)
+#else
+#define        XFS_DFORK_Q_ARCH(dip,arch)          XFS_CFORK_Q_ARCH(&(dip)->di_core, arch)
+#define XFS_DFORK_Q(dip)                    XFS_DFORK_Q_ARCH(dip,ARCH_NOCONVERT)
+
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DFORK_BOFF)
+int xfs_dfork_boff_arch(xfs_dinode_t *dip, xfs_arch_t arch);
+int xfs_dfork_boff(xfs_dinode_t *dip);
+#define        XFS_DFORK_BOFF_ARCH(dip,arch)       xfs_dfork_boff_arch(dip,arch)
+#define        XFS_DFORK_BOFF(dip)                 xfs_dfork_boff(dip)
+#else
+#define        XFS_DFORK_BOFF_ARCH(dip,arch)       XFS_CFORK_BOFF_ARCH(&(dip)->di_core, arch)
+#define XFS_DFORK_BOFF(dip)                 XFS_DFORK_BOFF_ARCH(dip,ARCH_NOCONVERT)
+
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DFORK_DPTR)
+char *xfs_dfork_dptr_arch(xfs_dinode_t *dip, xfs_arch_t arch);
+char *xfs_dfork_dptr(xfs_dinode_t *dip);
+#define        XFS_DFORK_DPTR_ARCH(dip,arch)       xfs_dfork_dptr_arch(dip,arch)
+#define        XFS_DFORK_DPTR(dip)                 xfs_dfork_dptr(dip)
+#else
+#define        XFS_DFORK_DPTR_ARCH(dip,arch)       ((dip)->di_u.di_c)
+#define XFS_DFORK_DPTR(dip)                 XFS_DFORK_DPTR_ARCH(dip,ARCH_NOCONVERT)
+
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DFORK_APTR)
+char *xfs_dfork_aptr_arch(xfs_dinode_t *dip, xfs_arch_t arch);
+char *xfs_dfork_aptr(xfs_dinode_t *dip);
+#define        XFS_DFORK_APTR_ARCH(dip,arch)       xfs_dfork_aptr_arch(dip,arch)
+#define        XFS_DFORK_APTR(dip)                 xfs_dfork_aptr(dip) 
+#else
+#define        XFS_DFORK_APTR_ARCH(dip,arch)       ((dip)->di_u.di_c + XFS_DFORK_BOFF_ARCH(dip, arch))
+#define XFS_DFORK_APTR(dip)                 XFS_DFORK_APTR_ARCH(dip,ARCH_NOCONVERT)
+
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DFORK_PTR)
+char *xfs_dfork_ptr_arch(xfs_dinode_t *dip, int w, xfs_arch_t arch);
+char *xfs_dfork_ptr(xfs_dinode_t *dip, int w);
+#define        XFS_DFORK_PTR_ARCH(dip,w,arch)      xfs_dfork_ptr_arch(dip,w,arch)
+#define        XFS_DFORK_PTR(dip,w)                xfs_dfork_ptr(dip,w)
+#else
+#define        XFS_DFORK_PTR_ARCH(dip,w,arch)  \
+       ((w) == XFS_DATA_FORK ? XFS_DFORK_DPTR_ARCH(dip, arch) : XFS_DFORK_APTR_ARCH(dip, arch))
+#define XFS_DFORK_PTR(dip,w)                XFS_DFORK_PTR_ARCH(dip,w,ARCH_NOCONVERT)
+
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_CFORK_FORMAT)
+int xfs_cfork_format_arch(xfs_dinode_core_t *dcp, int w, xfs_arch_t arch);
+int xfs_cfork_format(xfs_dinode_core_t *dcp, int w);
+#define        XFS_CFORK_FORMAT_ARCH(dcp,w,arch)   xfs_cfork_format_arch(dcp,w,arch)
+#define        XFS_CFORK_FORMAT(dcp,w)             xfs_cfork_format(dcp,w)
+#else
+#define        XFS_CFORK_FORMAT_ARCH(dcp,w,arch) \
+       ((w) == XFS_DATA_FORK ? INT_GET((dcp)->di_format, arch) : INT_GET((dcp)->di_aformat, arch))
+#define XFS_CFORK_FORMAT(dcp,w)             XFS_CFORK_FORMAT_ARCH(dcp,w,ARCH_NOCONVERT)
+
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_CFORK_FMT_SET)
+void xfs_cfork_fmt_set_arch(xfs_dinode_core_t *dcp, int w, int n, xfs_arch_t arch);
+void xfs_cfork_fmt_set(xfs_dinode_core_t *dcp, int w, int n);
+#define        XFS_CFORK_FMT_SET_ARCH(dcp,w,n,arch) xfs_cfork_fmt_set_arch(dcp,w,n,arch)
+#define        XFS_CFORK_FMT_SET(dcp,w,n)           xfs_cfork_fmt_set(dcp,w,n)
+#else
+#define        XFS_CFORK_FMT_SET_ARCH(dcp,w,n,arch) \
+       ((w) == XFS_DATA_FORK ? \
+               (INT_SET((dcp)->di_format, arch, (n))) : \
+               (INT_SET((dcp)->di_aformat, arch, (n))))
+#define XFS_CFORK_FMT_SET(dcp,w,n)           XFS_CFORK_FMT_SET_ARCH(dcp,w,n,ARCH_NOCONVERT)
+
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_CFORK_NEXTENTS)
+int xfs_cfork_nextents_arch(xfs_dinode_core_t *dcp, int w, xfs_arch_t arch);
+int xfs_cfork_nextents(xfs_dinode_core_t *dcp, int w);
+#define        XFS_CFORK_NEXTENTS_ARCH(dcp,w,arch)  xfs_cfork_nextents_arch(dcp,w,arch)
+#define        XFS_CFORK_NEXTENTS(dcp,w)            xfs_cfork_nextents(dcp,w)
+#else
+#define        XFS_CFORK_NEXTENTS_ARCH(dcp,w,arch) \
+       ((w) == XFS_DATA_FORK ? INT_GET((dcp)->di_nextents, arch) : INT_GET((dcp)->di_anextents, arch))
+#define XFS_CFORK_NEXTENTS(dcp,w)            XFS_CFORK_NEXTENTS_ARCH(dcp,w,ARCH_NOCONVERT) 
+
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_CFORK_NEXT_SET)
+void xfs_cfork_next_set_arch(xfs_dinode_core_t *dcp, int w, int n, xfs_arch_t arch);
+void xfs_cfork_next_set(xfs_dinode_core_t *dcp, int w, int n);
+#define        XFS_CFORK_NEXT_SET_ARCH(dcp,w,n,arch)   xfs_cfork_next_set_arch(dcp,w,n,arch)
+#define        XFS_CFORK_NEXT_SET(dcp,w,n)             xfs_cfork_next_set(dcp,w,n)
+#else
+#define        XFS_CFORK_NEXT_SET_ARCH(dcp,w,n,arch) \
+       ((w) == XFS_DATA_FORK ? \
+               (INT_SET((dcp)->di_nextents, arch, (n))) : \
+               (INT_SET((dcp)->di_anextents, arch, (n))))
+#define XFS_CFORK_NEXT_SET(dcp,w,n)             XFS_CFORK_NEXT_SET_ARCH(dcp,w,n,ARCH_NOCONVERT)
+
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DFORK_FORMAT)
+int xfs_dfork_format_arch(xfs_dinode_t *dip, int w, xfs_arch_t arch);
+int xfs_dfork_format(xfs_dinode_t *dip, int w);
+#define        XFS_DFORK_FORMAT_ARCH(dip,w,arch)   xfs_dfork_format_arch(dip,w,arch)
+#define        XFS_DFORK_FORMAT(dip,w)             xfs_dfork_format(dip,w)
+#else
+#define        XFS_DFORK_FORMAT_ARCH(dip,w,arch)   XFS_CFORK_FORMAT_ARCH(&(dip)->di_core, w, arch)
+#define XFS_DFORK_FORMAT(dip,w)             XFS_DFORK_FORMAT_ARCH(dip,w,ARCH_NOCONVERT)
+
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DFORK_FMT_SET)
+void xfs_dfork_fmt_set_arch(xfs_dinode_t *dip, int w, int n, xfs_arch_t arch);
+void xfs_dfork_fmt_set(xfs_dinode_t *dip, int w, int n);
+#define        XFS_DFORK_FMT_SET_ARCH(dip,w,n,arch)    xfs_dfork_fmt_set_arch(dip,w,n,arch)
+#define        XFS_DFORK_FMT_SET(dip,w,n)              xfs_dfork_fmt_set(dip,w,n)
+#else
+#define        XFS_DFORK_FMT_SET_ARCH(dip,w,n,arch)    XFS_CFORK_FMT_SET_ARCH(&(dip)->di_core, w, n, arch)
+#define XFS_DFORK_FMT_SET(dip,w,n)              XFS_DFORK_FMT_SET_ARCH(dip,w,n,ARCH_NOCONVERT)
+
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DFORK_NEXTENTS)
+int xfs_dfork_nextents_arch(xfs_dinode_t *dip, int w, xfs_arch_t arch);
+int xfs_dfork_nextents(xfs_dinode_t *dip, int w);
+#define        XFS_DFORK_NEXTENTS_ARCH(dip,w,arch) xfs_dfork_nextents_arch(dip,w,arch)
+#define        XFS_DFORK_NEXTENTS(dip,w)           xfs_dfork_nextents(dip,w)
+#else
+#define        XFS_DFORK_NEXTENTS_ARCH(dip,w,arch) XFS_CFORK_NEXTENTS_ARCH(&(dip)->di_core, w, arch)
+#define XFS_DFORK_NEXTENTS(dip,w)           XFS_DFORK_NEXTENTS_ARCH(dip,w,ARCH_NOCONVERT)
+
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DFORK_NEXT_SET)
+void xfs_dfork_next_set_arch(xfs_dinode_t *dip, int w, int n, xfs_arch_t arch);
+void xfs_dfork_next_set(xfs_dinode_t *dip, int w, int n);
+#define        XFS_DFORK_NEXT_SET_ARCH(dip,w,n,arch)   xfs_dfork_next_set_arch(dip,w,n,arch)
+#define        XFS_DFORK_NEXT_SET(dip,w,n)             xfs_dfork_next_set(dip,w,n)
+#else
+#define        XFS_DFORK_NEXT_SET_ARCH(dip,w,n,arch)   XFS_CFORK_NEXT_SET_ARCH(&(dip)->di_core, w, n, arch)
+#define XFS_DFORK_NEXT_SET(dip,w,n)             XFS_DFORK_NEXT_SET_ARCH(dip,w,n,ARCH_NOCONVERT)
+
+#endif
+
+/*
+ * File types (mode field)
+ */
+#define        IFMT            0170000         /* type of file */
+#define        IFIFO           0010000         /* named pipe (fifo) */
+#define        IFCHR           0020000         /* character special */
+#define        IFDIR           0040000         /* directory */
+#define        IFBLK           0060000         /* block special */
+#define        IFREG           0100000         /* regular */
+#define        IFLNK           0120000         /* symbolic link */
+#define        IFSOCK          0140000         /* socket */
+#define        IFMNT           0160000         /* mount point */
+
+/*
+ * File execution and access modes.
+ */
+#define        ISUID           04000           /* set user id on execution */
+#define        ISGID           02000           /* set group id on execution */
+#define        ISVTX           01000           /* sticky directory */
+#define        IREAD           0400            /* read, write, execute permissions */
+#define        IWRITE          0200
+#define        IEXEC           0100
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BUF_TO_DINODE)
+xfs_dinode_t *xfs_buf_to_dinode(struct xfs_buf *bp);
+#define        XFS_BUF_TO_DINODE(bp)   xfs_buf_to_dinode(bp)
+#else
+#define        XFS_BUF_TO_DINODE(bp)   ((xfs_dinode_t *)(XFS_BUF_PTR(bp)))
+#endif
+
+/*
+ * Values for di_flags
+ * There should be a one-to-one correspondence between these flags and the
+ * XFS_XFLAG_s.
+ */
+#define XFS_DIFLAG_REALTIME_BIT        0       /* file's blocks come from rt area */
+#define XFS_DIFLAG_PREALLOC_BIT        1       /* file space has been preallocated */
+#define        XFS_DIFLAG_NEWRTBM_BIT  2       /* for rtbitmap inode, new format */
+#define XFS_DIFLAG_REALTIME     (1 << XFS_DIFLAG_REALTIME_BIT)
+#define XFS_DIFLAG_PREALLOC    (1 << XFS_DIFLAG_PREALLOC_BIT)
+#define        XFS_DIFLAG_NEWRTBM      (1 << XFS_DIFLAG_NEWRTBM_BIT)
+#define XFS_DIFLAG_ALL  \
+       (XFS_DIFLAG_REALTIME|XFS_DIFLAG_PREALLOC|XFS_DIFLAG_NEWRTBM)
+
+#endif /* __XFS_DINODE_H__ */
diff --git a/include/xfs_dir.h b/include/xfs_dir.h
new file mode 100644 (file)
index 0000000..ead2621
--- /dev/null
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_DIR_H__
+#define        __XFS_DIR_H__
+
+/*
+ * Large directories are structured around Btrees where all the data
+ * elements are in the leaf nodes.  Filenames are hashed into an int,
+ * then that int is used as the index into the Btree.  Since the hashval
+ * of a filename may not be unique, we may have duplicate keys.  The
+ * internal links in the Btree are logical block offsets into the file.
+ *
+ * Small directories use a different format and are packed as tightly
+ * as possible so as to fit into the literal area of the inode.
+ */
+
+#ifdef XFS_ALL_TRACE
+#define        XFS_DIR_TRACE
+#endif
+
+#if !defined(DEBUG)
+#undef XFS_DIR_TRACE
+#endif
+
+/*========================================================================
+ * Function prototypes for the kernel.
+ *========================================================================*/
+
+struct uio;
+struct xfs_bmap_free;
+struct xfs_da_args;
+struct xfs_dinode;
+struct xfs_inode;
+struct xfs_mount;
+struct xfs_trans;
+
+/*
+ * Directory function types.
+ * Put in structures (xfs_dirops_t) for v1 and v2 directories.
+ */
+typedef void   (*xfs_dir_mount_t)(struct xfs_mount *mp);
+typedef int    (*xfs_dir_isempty_t)(struct xfs_inode *dp);
+typedef int    (*xfs_dir_init_t)(struct xfs_trans *tp,
+                                 struct xfs_inode *dp,
+                                 struct xfs_inode *pdp);
+typedef int    (*xfs_dir_createname_t)(struct xfs_trans *tp,
+                                       struct xfs_inode *dp,
+                                       char *name,
+                                       int namelen,
+                                       xfs_ino_t inum,
+                                       xfs_fsblock_t *first,
+                                       struct xfs_bmap_free *flist,
+                                       xfs_extlen_t total);
+typedef int    (*xfs_dir_lookup_t)(struct xfs_trans *tp,
+                                   struct xfs_inode *dp,
+                                   char *name,
+                                   int namelen,
+                                   xfs_ino_t *inum);
+typedef int    (*xfs_dir_removename_t)(struct xfs_trans *tp,
+                                       struct xfs_inode *dp,
+                                       char *name,
+                                       int namelen,
+                                       xfs_ino_t ino,
+                                       xfs_fsblock_t *first,
+                                       struct xfs_bmap_free *flist,
+                                       xfs_extlen_t total);
+typedef int    (*xfs_dir_getdents_t)(struct xfs_trans *tp,
+                                     struct xfs_inode *dp,
+                                     struct uio *uio,
+                                     int *eofp);
+typedef int    (*xfs_dir_replace_t)(struct xfs_trans *tp,
+                                    struct xfs_inode *dp,
+                                    char *name,
+                                    int namelen,
+                                    xfs_ino_t inum,
+                                    xfs_fsblock_t *first,
+                                    struct xfs_bmap_free *flist,
+                                    xfs_extlen_t total);
+typedef int    (*xfs_dir_canenter_t)(struct xfs_trans *tp,
+                                     struct xfs_inode *dp,
+                                     char *name,
+                                     int namelen);
+typedef int    (*xfs_dir_shortform_validate_ondisk_t)(struct xfs_mount *mp,
+                                                      struct xfs_dinode *dip);
+typedef int    (*xfs_dir_shortform_to_single_t)(struct xfs_da_args *args);
+
+typedef struct xfs_dirops {
+       xfs_dir_mount_t                         xd_mount;
+       xfs_dir_isempty_t                       xd_isempty;
+       xfs_dir_init_t                          xd_init;
+       xfs_dir_createname_t                    xd_createname;
+       xfs_dir_lookup_t                        xd_lookup;
+       xfs_dir_removename_t                    xd_removename;
+       xfs_dir_getdents_t                      xd_getdents;
+       xfs_dir_replace_t                       xd_replace;
+       xfs_dir_canenter_t                      xd_canenter;
+       xfs_dir_shortform_validate_ondisk_t     xd_shortform_validate_ondisk;
+       xfs_dir_shortform_to_single_t           xd_shortform_to_single;
+} xfs_dirops_t;
+
+/*
+ * Overall external interface routines.
+ */
+void   xfs_dir_startup(void);  /* called exactly once */
+
+#define        XFS_DIR_MOUNT(mp)       \
+       ((mp)->m_dirops.xd_mount(mp))
+#define        XFS_DIR_ISEMPTY(mp,dp)  \
+       ((mp)->m_dirops.xd_isempty(dp))
+#define        XFS_DIR_INIT(mp,tp,dp,pdp)      \
+       ((mp)->m_dirops.xd_init(tp,dp,pdp))
+#define        XFS_DIR_CREATENAME(mp,tp,dp,name,namelen,inum,first,flist,total) \
+       ((mp)->m_dirops.xd_createname(tp,dp,name,namelen,inum,first,flist,\
+                                     total))
+#define        XFS_DIR_LOOKUP(mp,tp,dp,name,namelen,inum)      \
+       ((mp)->m_dirops.xd_lookup(tp,dp,name,namelen,inum))
+#define        XFS_DIR_REMOVENAME(mp,tp,dp,name,namelen,ino,first,flist,total) \
+       ((mp)->m_dirops.xd_removename(tp,dp,name,namelen,ino,first,flist,total))
+#define        XFS_DIR_GETDENTS(mp,tp,dp,uio,eofp)     \
+       ((mp)->m_dirops.xd_getdents(tp,dp,uio,eofp))
+#define        XFS_DIR_REPLACE(mp,tp,dp,name,namelen,inum,first,flist,total)   \
+       ((mp)->m_dirops.xd_replace(tp,dp,name,namelen,inum,first,flist,total))
+#define        XFS_DIR_CANENTER(mp,tp,dp,name,namelen) \
+       ((mp)->m_dirops.xd_canenter(tp,dp,name,namelen))
+#define        XFS_DIR_SHORTFORM_VALIDATE_ONDISK(mp,dip)       \
+       ((mp)->m_dirops.xd_shortform_validate_ondisk(mp,dip))
+#define        XFS_DIR_SHORTFORM_TO_SINGLE(mp,args)    \
+       ((mp)->m_dirops.xd_shortform_to_single(args))
+
+#define        XFS_DIR_IS_V1(mp)       ((mp)->m_dirversion == 1)
+extern xfs_dirops_t xfsv1_dirops;
+
+#endif /* __XFS_DIR_H__ */
diff --git a/include/xfs_dir2.h b/include/xfs_dir2.h
new file mode 100644 (file)
index 0000000..f723933
--- /dev/null
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_DIR2_H__
+#define        __XFS_DIR2_H__
+
+struct dirent;
+struct uio;
+struct xfs_dabuf;
+struct xfs_da_args;
+struct xfs_dir2_put_args;
+struct xfs_inode;
+struct xfs_trans;
+
+/*
+ * Directory version 2.
+ * There are 4 possible formats:
+ *     shortform
+ *     single block - data with embedded leaf at the end
+ *     multiple data blocks, single leaf+freeindex block
+ *     data blocks, node&leaf blocks (btree), freeindex blocks
+ *
+ *     The shortform format is in xfs_dir2_sf.h.
+ *     The single block format is in xfs_dir2_block.h.
+ *     The data block format is in xfs_dir2_data.h.
+ *     The leaf and freeindex block formats are in xfs_dir2_leaf.h.
+ *     Node blocks are the same as the other version, in xfs_da_btree.h.
+ */
+
+/*
+ * Byte offset in data block and shortform entry.
+ */
+typedef        __uint16_t      xfs_dir2_data_off_t;
+#define        NULLDATAOFF     0xffffU
+typedef uint           xfs_dir2_data_aoff_t;   /* argument form */
+
+/*
+ * Directory block number (logical dirblk in file)
+ */
+typedef        __uint32_t      xfs_dir2_db_t;
+
+/*
+ * Byte offset in a directory.
+ */
+typedef        xfs_off_t               xfs_dir2_off_t;
+
+/*
+ * For getdents, argument struct for put routines.
+ */
+typedef int (*xfs_dir2_put_t)(struct xfs_dir2_put_args *pa);
+typedef struct xfs_dir2_put_args {
+       xfs_off_t               cook;           /* cookie of (next) entry */
+       xfs_intino_t    ino;            /* inode number */
+       struct dirent   *dbp;           /* buffer pointer */
+       char            *name;          /* directory entry name */
+       int             namelen;        /* length of name */
+       int             done;           /* output: set if value was stored */
+       xfs_dir2_put_t  put;            /* put function ptr (i/o) */
+       struct uio      *uio;           /* uio control structure */
+       unsigned char   type;           /* file type (see include/linux/fs.h) */
+} xfs_dir2_put_args_t;
+
+#define        XFS_DIR_IS_V2(mp)       ((mp)->m_dirversion == 2)
+extern xfs_dirops_t    xfsv2_dirops;
+
+/*
+ * Other interfaces used by the rest of the dir v2 code.
+ */
+extern int
+       xfs_dir2_grow_inode(struct xfs_da_args *args, int space,
+                           xfs_dir2_db_t *dbp);
+
+extern int
+       xfs_dir2_isblock(struct xfs_trans *tp, struct xfs_inode *dp, int *vp);
+
+extern int
+       xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp, int *vp);
+
+extern int
+       xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db,
+                             struct xfs_dabuf *bp);
+
+#endif /* __XFS_DIR2_H__ */
diff --git a/include/xfs_dir2_block.h b/include/xfs_dir2_block.h
new file mode 100644 (file)
index 0000000..049f598
--- /dev/null
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_DIR2_BLOCK_H__
+#define        __XFS_DIR2_BLOCK_H__
+
+/*
+ * xfs_dir2_block.h
+ * Directory version 2, single block format structures
+ */
+
+struct dirent;
+struct uio;
+struct xfs_dabuf;
+struct xfs_da_args;
+struct xfs_dir2_data_hdr;
+struct xfs_dir2_leaf_entry;
+struct xfs_inode;
+struct xfs_mount;
+struct xfs_trans;
+
+/*
+ * The single block format is as follows:
+ * xfs_dir2_data_hdr_t structure
+ * xfs_dir2_data_entry_t and xfs_dir2_data_unused_t structures
+ * xfs_dir2_leaf_entry_t structures
+ * xfs_dir2_block_tail_t structure
+ */
+
+#define        XFS_DIR2_BLOCK_MAGIC    0x58443242      /* XD2B: for one block dirs */
+
+typedef struct xfs_dir2_block_tail {
+       __uint32_t      count;                  /* count of leaf entries */
+       __uint32_t      stale;                  /* count of stale lf entries */
+} xfs_dir2_block_tail_t;
+
+/*
+ * Generic single-block structure, for xfs_db.
+ */
+typedef struct xfs_dir2_block {
+       xfs_dir2_data_hdr_t     hdr;            /* magic XFS_DIR2_BLOCK_MAGIC */
+       xfs_dir2_data_union_t   u[1];
+       xfs_dir2_leaf_entry_t   leaf[1];
+       xfs_dir2_block_tail_t   tail;
+} xfs_dir2_block_t;
+
+/*
+ * Pointer to the leaf header embedded in a data block (1-block format)
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_BLOCK_TAIL_P)
+xfs_dir2_block_tail_t *
+xfs_dir2_block_tail_p(struct xfs_mount *mp, xfs_dir2_block_t *block);
+#define        XFS_DIR2_BLOCK_TAIL_P(mp,block) xfs_dir2_block_tail_p(mp,block) 
+#else
+#define        XFS_DIR2_BLOCK_TAIL_P(mp,block) \
+       (((xfs_dir2_block_tail_t *)((char *)(block) + (mp)->m_dirblksize)) - 1)
+#endif
+
+/*
+ * Pointer to the leaf entries embedded in a data block (1-block format)
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_BLOCK_LEAF_P)
+struct xfs_dir2_leaf_entry *xfs_dir2_block_leaf_p_arch(
+        xfs_dir2_block_tail_t *btp, xfs_arch_t arch);
+#define        XFS_DIR2_BLOCK_LEAF_P_ARCH(btp,arch) \
+        xfs_dir2_block_leaf_p_arch(btp,arch)
+#else
+#define        XFS_DIR2_BLOCK_LEAF_P_ARCH(btp,arch)    \
+       (((struct xfs_dir2_leaf_entry *)(btp)) - INT_GET((btp)->count, arch))
+#endif
+
+/*
+ * Function declarations.
+ */
+
+extern int
+       xfs_dir2_block_addname(struct xfs_da_args *args);
+
+extern int
+       xfs_dir2_block_getdents(struct xfs_trans *tp, struct xfs_inode *dp,
+                               struct uio *uio, int *eofp, struct dirent *dbp,
+                               xfs_dir2_put_t put);
+
+extern int
+       xfs_dir2_block_lookup(struct xfs_da_args *args);
+
+extern int
+       xfs_dir2_block_removename(struct xfs_da_args *args);
+
+extern int
+       xfs_dir2_block_replace(struct xfs_da_args *args);
+
+extern int
+       xfs_dir2_leaf_to_block(struct xfs_da_args *args, struct xfs_dabuf *lbp,
+                              struct xfs_dabuf *dbp);
+
+extern int
+       xfs_dir2_sf_to_block(struct xfs_da_args *args);
+
+#endif /* __XFS_DIR2_BLOCK_H__ */
diff --git a/include/xfs_dir2_data.h b/include/xfs_dir2_data.h
new file mode 100644 (file)
index 0000000..96c850d
--- /dev/null
@@ -0,0 +1,232 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_DIR2_DATA_H__
+#define        __XFS_DIR2_DATA_H__
+
+/*
+ * Directory format 2, data block structures.
+ */
+
+struct xfs_dabuf;
+struct xfs_da_args;
+struct xfs_inode;
+struct xfs_trans;
+
+/*
+ * Constants.
+ */
+#define        XFS_DIR2_DATA_MAGIC     0x58443244      /* XD2D: for multiblock dirs */
+#define        XFS_DIR2_DATA_ALIGN_LOG 3               /* i.e., 8 bytes */
+#define        XFS_DIR2_DATA_ALIGN     (1 << XFS_DIR2_DATA_ALIGN_LOG)
+#define        XFS_DIR2_DATA_FREE_TAG  0xffff
+#define        XFS_DIR2_DATA_FD_COUNT  3
+
+/*
+ * Directory address space divided into sections,
+ * spaces separated by 32gb.
+ */
+#define        XFS_DIR2_SPACE_SIZE     (1ULL << (32 + XFS_DIR2_DATA_ALIGN_LOG))
+#define        XFS_DIR2_DATA_SPACE     0
+#define        XFS_DIR2_DATA_OFFSET    (XFS_DIR2_DATA_SPACE * XFS_DIR2_SPACE_SIZE)
+#define        XFS_DIR2_DATA_FIRSTDB(mp)       \
+       XFS_DIR2_BYTE_TO_DB(mp, XFS_DIR2_DATA_OFFSET)
+
+/*
+ * Offsets of . and .. in data space (always block 0)
+ */
+#define        XFS_DIR2_DATA_DOT_OFFSET        \
+       ((xfs_dir2_data_aoff_t)sizeof(xfs_dir2_data_hdr_t))
+#define        XFS_DIR2_DATA_DOTDOT_OFFSET     \
+       (XFS_DIR2_DATA_DOT_OFFSET + XFS_DIR2_DATA_ENTSIZE(1))
+#define        XFS_DIR2_DATA_FIRST_OFFSET              \
+       (XFS_DIR2_DATA_DOTDOT_OFFSET + XFS_DIR2_DATA_ENTSIZE(2))
+
+/*
+ * Structures.
+ */
+
+/*
+ * Describe a free area in the data block.
+ * The freespace will be formatted as a xfs_dir2_data_unused_t.
+ */
+typedef struct xfs_dir2_data_free {
+       xfs_dir2_data_off_t     offset;         /* start of freespace */
+       xfs_dir2_data_off_t     length;         /* length of freespace */
+} xfs_dir2_data_free_t;
+
+/*
+ * Header for the data blocks.
+ * Always at the beginning of a directory-sized block.
+ * The code knows that XFS_DIR2_DATA_FD_COUNT is 3.
+ */
+typedef struct xfs_dir2_data_hdr {
+       __uint32_t              magic;          /* XFS_DIR2_DATA_MAGIC */
+                                               /* or XFS_DIR2_BLOCK_MAGIC */
+       xfs_dir2_data_free_t    bestfree[XFS_DIR2_DATA_FD_COUNT];
+} xfs_dir2_data_hdr_t;
+
+/*
+ * Active entry in a data block.  Aligned to 8 bytes.
+ * Tag appears as the last 2 bytes.
+ */
+typedef struct xfs_dir2_data_entry {
+       xfs_ino_t               inumber;        /* inode number */
+       __uint8_t               namelen;        /* name length */
+       __uint8_t               name[1];        /* name bytes, no null */
+                                               /* variable offset */
+       xfs_dir2_data_off_t     tag;            /* starting offset of us */
+} xfs_dir2_data_entry_t;
+
+/*
+ * Unused entry in a data block.  Aligned to 8 bytes.
+ * Tag appears as the last 2 bytes.
+ */
+typedef struct xfs_dir2_data_unused {
+       __uint16_t              freetag;        /* XFS_DIR2_DATA_FREE_TAG */
+       xfs_dir2_data_off_t     length;         /* total free length */
+                                               /* variable offset */
+       xfs_dir2_data_off_t     tag;            /* starting offset of us */
+} xfs_dir2_data_unused_t;
+
+typedef union {
+       xfs_dir2_data_entry_t   entry;
+       xfs_dir2_data_unused_t  unused;
+} xfs_dir2_data_union_t;
+
+/*
+ * Generic data block structure, for xfs_db.
+ */
+typedef struct xfs_dir2_data {
+       xfs_dir2_data_hdr_t     hdr;            /* magic XFS_DIR2_DATA_MAGIC */
+       xfs_dir2_data_union_t   u[1];
+} xfs_dir2_data_t;
+
+/*
+ * Macros.
+ */
+
+/*
+ * Size of a data entry.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_DATA_ENTSIZE)
+int xfs_dir2_data_entsize(int n);
+#define XFS_DIR2_DATA_ENTSIZE(n)       xfs_dir2_data_entsize(n)
+#else
+#define        XFS_DIR2_DATA_ENTSIZE(n)        \
+       ((int)(roundup(offsetof(xfs_dir2_data_entry_t, name[0]) + (n) + \
+                (uint)sizeof(xfs_dir2_data_off_t), XFS_DIR2_DATA_ALIGN)))
+#endif
+
+/*
+ * Pointer to an entry's tag word.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_DATA_ENTRY_TAG_P)
+xfs_dir2_data_off_t *xfs_dir2_data_entry_tag_p(xfs_dir2_data_entry_t *dep);
+#define        XFS_DIR2_DATA_ENTRY_TAG_P(dep)  xfs_dir2_data_entry_tag_p(dep)
+#else
+#define        XFS_DIR2_DATA_ENTRY_TAG_P(dep)  \
+       ((xfs_dir2_data_off_t *)\
+        ((char *)(dep) + XFS_DIR2_DATA_ENTSIZE((dep)->namelen) - \
+         (uint)sizeof(xfs_dir2_data_off_t)))
+#endif
+
+/*
+ * Pointer to a freespace's tag word.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_DATA_UNUSED_TAG_P)
+xfs_dir2_data_off_t *xfs_dir2_data_unused_tag_p_arch(
+        xfs_dir2_data_unused_t *dup, xfs_arch_t arch);
+#define        XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(dup,arch) \
+        xfs_dir2_data_unused_tag_p_arch(dup,arch)
+#else
+#define        XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(dup,arch)       \
+       ((xfs_dir2_data_off_t *)\
+        ((char *)(dup) + INT_GET((dup)->length, arch) \
+                        - (uint)sizeof(xfs_dir2_data_off_t)))
+#endif
+
+/*
+ * Function declarations.
+ */
+
+#ifdef DEBUG
+extern void
+       xfs_dir2_data_check(struct xfs_inode *dp, struct xfs_dabuf *bp);
+#else
+#define        xfs_dir2_data_check(dp,bp)
+#endif
+
+extern xfs_dir2_data_free_t *
+       xfs_dir2_data_freefind(xfs_dir2_data_t *d,
+                              xfs_dir2_data_unused_t *dup);
+
+extern xfs_dir2_data_free_t *
+       xfs_dir2_data_freeinsert(xfs_dir2_data_t *d,
+                                xfs_dir2_data_unused_t *dup, int *loghead);
+
+extern void
+       xfs_dir2_data_freeremove(xfs_dir2_data_t *d,
+                                xfs_dir2_data_free_t *dfp, int *loghead);
+
+extern void
+       xfs_dir2_data_freescan(struct xfs_mount *mp, xfs_dir2_data_t *d,
+                              int *loghead, char *aendp);
+
+extern int
+       xfs_dir2_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno,
+                          struct xfs_dabuf **bpp);
+
+extern void
+       xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_dabuf *bp,
+                               xfs_dir2_data_entry_t *dep);
+
+extern void
+       xfs_dir2_data_log_header(struct xfs_trans *tp, struct xfs_dabuf *bp);
+
+extern void
+       xfs_dir2_data_log_unused(struct xfs_trans *tp, struct xfs_dabuf *bp,
+                                xfs_dir2_data_unused_t *dup);
+
+extern void
+       xfs_dir2_data_make_free(struct xfs_trans *tp, struct xfs_dabuf *bp,
+                               xfs_dir2_data_aoff_t offset,
+                               xfs_dir2_data_aoff_t len, int *needlogp,
+                               int *needscanp);
+
+extern void
+       xfs_dir2_data_use_free(struct xfs_trans *tp, struct xfs_dabuf *bp,
+                              xfs_dir2_data_unused_t *dup,
+                              xfs_dir2_data_aoff_t offset,
+                              xfs_dir2_data_aoff_t len, int *needlogp,
+                              int *needscanp);
+
+#endif /* __XFS_DIR2_DATA_H__ */
diff --git a/include/xfs_dir2_leaf.h b/include/xfs_dir2_leaf.h
new file mode 100644 (file)
index 0000000..f7ef396
--- /dev/null
@@ -0,0 +1,361 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_DIR2_LEAF_H__
+#define        __XFS_DIR2_LEAF_H__
+
+/*
+ * Directory version 2, leaf block structures.
+ */
+
+struct dirent;
+struct uio;
+struct xfs_dabuf;
+struct xfs_da_args;
+struct xfs_inode;
+struct xfs_mount;
+struct xfs_trans;
+
+/*
+ * Constants.
+ */
+
+/*
+ * Offset of the leaf/node space.  First block in this space
+ * is the btree root.
+ */
+#define        XFS_DIR2_LEAF_SPACE     1
+#define        XFS_DIR2_LEAF_OFFSET    (XFS_DIR2_LEAF_SPACE * XFS_DIR2_SPACE_SIZE)
+#define        XFS_DIR2_LEAF_FIRSTDB(mp)       \
+       XFS_DIR2_BYTE_TO_DB(mp, XFS_DIR2_LEAF_OFFSET)
+
+/*
+ * Types.
+ */
+
+/*
+ * Offset in data space of a data entry.
+ */
+typedef        __uint32_t      xfs_dir2_dataptr_t;
+#define        XFS_DIR2_MAX_DATAPTR    ((xfs_dir2_dataptr_t)0x7fffffff)
+#define        XFS_DIR2_NULL_DATAPTR   ((xfs_dir2_dataptr_t)0)
+
+/*
+ * Structures.
+ */
+
+/*
+ * Leaf block header.
+ */
+typedef struct xfs_dir2_leaf_hdr {
+       xfs_da_blkinfo_t        info;           /* header for da routines */
+       __uint16_t              count;          /* count of entries */
+       __uint16_t              stale;          /* count of stale entries */
+} xfs_dir2_leaf_hdr_t;
+
+/*
+ * Leaf block entry.
+ */
+typedef struct xfs_dir2_leaf_entry {
+       xfs_dahash_t            hashval;        /* hash value of name */
+       xfs_dir2_dataptr_t      address;        /* address of data entry */
+} xfs_dir2_leaf_entry_t;
+
+/*
+ * Leaf block tail.
+ */
+typedef struct xfs_dir2_leaf_tail {
+       __uint32_t              bestcount;
+} xfs_dir2_leaf_tail_t;
+
+/*
+ * Leaf block.
+ * bests and tail are at the end of the block for single-leaf only
+ * (magic = XFS_DIR2_LEAF1_MAGIC not XFS_DIR2_LEAFN_MAGIC).
+ */
+typedef struct xfs_dir2_leaf {
+       xfs_dir2_leaf_hdr_t     hdr;            /* leaf header */
+       xfs_dir2_leaf_entry_t   ents[1];        /* entries */
+                                               /* ... */
+       xfs_dir2_data_off_t     bests[1];       /* best free counts */
+       xfs_dir2_leaf_tail_t    tail;           /* leaf tail */
+} xfs_dir2_leaf_t;
+
+/*
+ * Macros.
+ * The DB blocks are logical directory block numbers, not filesystem blocks.
+ */
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_MAX_LEAF_ENTS)
+int
+xfs_dir2_max_leaf_ents(struct xfs_mount *mp);
+#define        XFS_DIR2_MAX_LEAF_ENTS(mp)      \
+       xfs_dir2_max_leaf_ents(mp)
+#else
+#define        XFS_DIR2_MAX_LEAF_ENTS(mp)      \
+       ((int)(((mp)->m_dirblksize - (uint)sizeof(xfs_dir2_leaf_hdr_t)) / \
+              (uint)sizeof(xfs_dir2_leaf_entry_t)))
+#endif
+
+/*
+ * Get address of the bestcount field in the single-leaf block.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_LEAF_TAIL_P)
+xfs_dir2_leaf_tail_t *
+xfs_dir2_leaf_tail_p(struct xfs_mount *mp, xfs_dir2_leaf_t *lp);
+#define        XFS_DIR2_LEAF_TAIL_P(mp,lp)     \
+       xfs_dir2_leaf_tail_p(mp, lp)
+#else
+#define        XFS_DIR2_LEAF_TAIL_P(mp,lp)     \
+       ((xfs_dir2_leaf_tail_t *)\
+        ((char *)(lp) + (mp)->m_dirblksize - \
+         (uint)sizeof(xfs_dir2_leaf_tail_t)))
+#endif
+
+/*
+ * Get address of the bests array in the single-leaf block.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_LEAF_BESTS_P)
+xfs_dir2_data_off_t *
+xfs_dir2_leaf_bests_p_arch(xfs_dir2_leaf_tail_t *ltp, xfs_arch_t arch);
+#define        XFS_DIR2_LEAF_BESTS_P_ARCH(ltp,arch)    xfs_dir2_leaf_bests_p_arch(ltp,arch)
+#else
+#define        XFS_DIR2_LEAF_BESTS_P_ARCH(ltp,arch)    \
+       ((xfs_dir2_data_off_t *)(ltp) - INT_GET((ltp)->bestcount, arch))
+#endif
+
+/*
+ * Convert dataptr to byte in file space
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_DATAPTR_TO_BYTE)
+xfs_dir2_off_t
+xfs_dir2_dataptr_to_byte(struct xfs_mount *mp, xfs_dir2_dataptr_t dp);
+#define        XFS_DIR2_DATAPTR_TO_BYTE(mp,dp) xfs_dir2_dataptr_to_byte(mp, dp)
+#else
+#define        XFS_DIR2_DATAPTR_TO_BYTE(mp,dp) \
+       ((xfs_dir2_off_t)(dp) << XFS_DIR2_DATA_ALIGN_LOG)
+#endif
+
+/*
+ * Convert byte in file space to dataptr.  It had better be aligned.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_BYTE_TO_DATAPTR)
+xfs_dir2_dataptr_t
+xfs_dir2_byte_to_dataptr(struct xfs_mount *mp, xfs_dir2_off_t by);
+#define        XFS_DIR2_BYTE_TO_DATAPTR(mp,by) xfs_dir2_byte_to_dataptr(mp,by)
+#else
+#define        XFS_DIR2_BYTE_TO_DATAPTR(mp,by) \
+       ((xfs_dir2_dataptr_t)((by) >> XFS_DIR2_DATA_ALIGN_LOG))
+#endif
+
+/*
+ * Convert dataptr to a block number
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_DATAPTR_TO_DB)
+xfs_dir2_db_t
+xfs_dir2_dataptr_to_db(struct xfs_mount *mp, xfs_dir2_dataptr_t dp);
+#define        XFS_DIR2_DATAPTR_TO_DB(mp,dp)   xfs_dir2_dataptr_to_db(mp, dp)
+#else
+#define        XFS_DIR2_DATAPTR_TO_DB(mp,dp)   \
+       XFS_DIR2_BYTE_TO_DB(mp, XFS_DIR2_DATAPTR_TO_BYTE(mp, dp))
+#endif
+
+/*
+ * Convert dataptr to a byte offset in a block
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_DATAPTR_TO_OFF)
+xfs_dir2_data_aoff_t
+xfs_dir2_dataptr_to_off(struct xfs_mount *mp, xfs_dir2_dataptr_t dp);
+#define        XFS_DIR2_DATAPTR_TO_OFF(mp,dp)  xfs_dir2_dataptr_to_off(mp, dp)
+#else
+#define        XFS_DIR2_DATAPTR_TO_OFF(mp,dp)  \
+       XFS_DIR2_BYTE_TO_OFF(mp, XFS_DIR2_DATAPTR_TO_BYTE(mp, dp))
+#endif
+
+/*
+ * Convert block and offset to byte in space
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_DB_OFF_TO_BYTE)
+xfs_dir2_off_t
+xfs_dir2_db_off_to_byte(struct xfs_mount *mp, xfs_dir2_db_t db,
+                       xfs_dir2_data_aoff_t o);
+#define        XFS_DIR2_DB_OFF_TO_BYTE(mp,db,o)        \
+       xfs_dir2_db_off_to_byte(mp, db, o)
+#else
+#define        XFS_DIR2_DB_OFF_TO_BYTE(mp,db,o)        \
+       (((xfs_dir2_off_t)(db) << \
+        ((mp)->m_sb.sb_blocklog + (mp)->m_sb.sb_dirblklog)) + (o))
+#endif
+
+/*
+ * Convert byte in space to (DB) block
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_BYTE_TO_DB)
+xfs_dir2_db_t xfs_dir2_byte_to_db(struct xfs_mount *mp, xfs_dir2_off_t by);
+#define        XFS_DIR2_BYTE_TO_DB(mp,by)      xfs_dir2_byte_to_db(mp, by)
+#else
+#define        XFS_DIR2_BYTE_TO_DB(mp,by)      \
+       ((xfs_dir2_db_t)((by) >> \
+                        ((mp)->m_sb.sb_blocklog + (mp)->m_sb.sb_dirblklog)))
+#endif
+
+/*
+ * Convert byte in space to (DA) block
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_BYTE_TO_DA)
+xfs_dablk_t xfs_dir2_byte_to_da(struct xfs_mount *mp, xfs_dir2_off_t by);
+#define        XFS_DIR2_BYTE_TO_DA(mp,by)      xfs_dir2_byte_to_da(mp, by)
+#else
+#define        XFS_DIR2_BYTE_TO_DA(mp,by)      \
+       XFS_DIR2_DB_TO_DA(mp, XFS_DIR2_BYTE_TO_DB(mp, by))
+#endif
+
+/*
+ * Convert byte in space to offset in a block
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_BYTE_TO_OFF)
+xfs_dir2_data_aoff_t
+xfs_dir2_byte_to_off(struct xfs_mount *mp, xfs_dir2_off_t by);
+#define        XFS_DIR2_BYTE_TO_OFF(mp,by)     xfs_dir2_byte_to_off(mp, by)
+#else
+#define        XFS_DIR2_BYTE_TO_OFF(mp,by)     \
+       ((xfs_dir2_data_aoff_t)((by) & \
+                               ((1 << ((mp)->m_sb.sb_blocklog + \
+                                       (mp)->m_sb.sb_dirblklog)) - 1)))
+#endif
+
+/*
+ * Convert block and offset to dataptr
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_DB_OFF_TO_DATAPTR)
+xfs_dir2_dataptr_t
+xfs_dir2_db_off_to_dataptr(struct xfs_mount *mp, xfs_dir2_db_t db,
+                          xfs_dir2_data_aoff_t o);
+#define        XFS_DIR2_DB_OFF_TO_DATAPTR(mp,db,o)     \
+       xfs_dir2_db_off_to_dataptr(mp, db, o)
+#else
+#define        XFS_DIR2_DB_OFF_TO_DATAPTR(mp,db,o)     \
+       XFS_DIR2_BYTE_TO_DATAPTR(mp, XFS_DIR2_DB_OFF_TO_BYTE(mp, db, o))
+#endif
+
+/*
+ * Convert block (DB) to block (dablk)
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_DB_TO_DA)
+xfs_dablk_t xfs_dir2_db_to_da(struct xfs_mount *mp, xfs_dir2_db_t db);
+#define        XFS_DIR2_DB_TO_DA(mp,db)        xfs_dir2_db_to_da(mp, db)
+#else
+#define        XFS_DIR2_DB_TO_DA(mp,db)        \
+       ((xfs_dablk_t)((db) << (mp)->m_sb.sb_dirblklog))
+#endif
+
+/*
+ * Convert block (dablk) to block (DB)
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_DA_TO_DB)
+xfs_dir2_db_t xfs_dir2_da_to_db(struct xfs_mount *mp, xfs_dablk_t da);
+#define        XFS_DIR2_DA_TO_DB(mp,da)        xfs_dir2_da_to_db(mp, da)
+#else
+#define        XFS_DIR2_DA_TO_DB(mp,da)        \
+       ((xfs_dir2_db_t)((da) >> (mp)->m_sb.sb_dirblklog))
+#endif
+
+/*
+ * Convert block (dablk) to byte offset in space
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_DA_TO_BYTE)
+xfs_dir2_off_t xfs_dir2_da_to_byte(struct xfs_mount *mp, xfs_dablk_t da);
+#define XFS_DIR2_DA_TO_BYTE(mp,da)     xfs_dir2_da_to_byte(mp, da)
+#else
+#define        XFS_DIR2_DA_TO_BYTE(mp,da)      \
+       XFS_DIR2_DB_OFF_TO_BYTE(mp, XFS_DIR2_DA_TO_DB(mp, da), 0)
+#endif
+
+/*
+ * Function declarations.
+ */
+
+extern int
+       xfs_dir2_block_to_leaf(struct xfs_da_args *args, struct xfs_dabuf *dbp);
+
+extern int
+       xfs_dir2_leaf_addname(struct xfs_da_args *args);
+
+extern void
+       xfs_dir2_leaf_compact(struct xfs_da_args *args, struct xfs_dabuf *bp);
+
+extern void
+       xfs_dir2_leaf_compact_x1(struct xfs_dabuf *bp, int *indexp,
+                                int *lowstalep, int *highstalep, int *lowlogp,
+                                int *highlogp);
+
+extern int
+       xfs_dir2_leaf_getdents(struct xfs_trans *tp, struct xfs_inode *dp,
+                              struct uio *uio, int *eofp, struct dirent *dbp,
+                              xfs_dir2_put_t put);
+
+extern int
+       xfs_dir2_leaf_init(struct xfs_da_args *args, xfs_dir2_db_t bno,
+                          struct xfs_dabuf **bpp, int magic);
+
+extern void
+       xfs_dir2_leaf_log_ents(struct xfs_trans *tp, struct xfs_dabuf *bp,
+                              int first, int last);
+
+extern void
+       xfs_dir2_leaf_log_bests(struct xfs_trans *tp, struct xfs_dabuf *bp,
+                               int first, int last);
+
+extern void
+       xfs_dir2_leaf_log_header(struct xfs_trans *tp, struct xfs_dabuf *bp);
+
+extern void
+       xfs_dir2_leaf_log_tail(struct xfs_trans *tp, struct xfs_dabuf *bp);
+
+extern int
+       xfs_dir2_leaf_lookup(struct xfs_da_args *args);
+
+extern int
+       xfs_dir2_leaf_removename(struct xfs_da_args *args);
+
+extern int
+       xfs_dir2_leaf_replace(struct xfs_da_args *args);
+
+extern int
+       xfs_dir2_leaf_search_hash(struct xfs_da_args *args,
+                                 struct xfs_dabuf *lbp);
+extern int
+       xfs_dir2_leaf_trim_data(struct xfs_da_args *args, struct xfs_dabuf *lbp,                                xfs_dir2_db_t db);
+
+extern int
+       xfs_dir2_node_to_leaf(struct xfs_da_state *state);
+
+#endif /* __XFS_DIR2_LEAF_H__ */
diff --git a/include/xfs_dir2_node.h b/include/xfs_dir2_node.h
new file mode 100644 (file)
index 0000000..4ec4d1e
--- /dev/null
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_DIR2_NODE_H__
+#define        __XFS_DIR2_NODE_H__
+
+/*
+ * Directory version 2, btree node format structures
+ */
+
+struct dirent;
+struct uio;
+struct xfs_dabuf;
+struct xfs_da_args;
+struct xfs_da_state;
+struct xfs_da_state_blk;
+struct xfs_inode;
+struct xfs_trans;
+
+/*
+ * Constants.
+ */
+
+/*
+ * Offset of the freespace index.
+ */
+#define        XFS_DIR2_FREE_SPACE     2
+#define        XFS_DIR2_FREE_OFFSET    (XFS_DIR2_FREE_SPACE * XFS_DIR2_SPACE_SIZE)
+#define        XFS_DIR2_FREE_FIRSTDB(mp)       \
+       XFS_DIR2_BYTE_TO_DB(mp, XFS_DIR2_FREE_OFFSET)
+
+#define        XFS_DIR2_FREE_MAGIC     0x58443246      /* XD2F */
+
+/*
+ * Structures.
+ */
+typedef        struct xfs_dir2_free_hdr {
+       __uint32_t              magic;          /* XFS_DIR2_FREE_MAGIC */
+       __int32_t               firstdb;        /* db of first entry */
+       __int32_t               nvalid;         /* count of valid entries */
+       __int32_t               nused;          /* count of used entries */
+} xfs_dir2_free_hdr_t;
+
+typedef struct xfs_dir2_free {
+       xfs_dir2_free_hdr_t     hdr;            /* block header */
+       xfs_dir2_data_off_t     bests[1];       /* best free counts */
+                                               /* unused entries are -1 */
+} xfs_dir2_free_t;
+#define        XFS_DIR2_MAX_FREE_BESTS(mp)     \
+       (((mp)->m_dirblksize - (uint)sizeof(xfs_dir2_free_hdr_t)) / \
+        (uint)sizeof(xfs_dir2_data_off_t))
+
+/*
+ * Macros.
+ */
+
+/*
+ * Convert data space db to the corresponding free db.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_DB_TO_FDB)
+xfs_dir2_db_t
+xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db);
+#define        XFS_DIR2_DB_TO_FDB(mp,db)       xfs_dir2_db_to_fdb(mp, db)
+#else
+#define        XFS_DIR2_DB_TO_FDB(mp,db)       \
+       (XFS_DIR2_FREE_FIRSTDB(mp) + (db) / XFS_DIR2_MAX_FREE_BESTS(mp))
+#endif
+
+/*
+ * Convert data space db to the corresponding index in a free db.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_DB_TO_FDINDEX)
+int
+xfs_dir2_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db);
+#define        XFS_DIR2_DB_TO_FDINDEX(mp,db)   xfs_dir2_db_to_fdindex(mp, db)
+#else
+#define        XFS_DIR2_DB_TO_FDINDEX(mp,db)   ((db) % XFS_DIR2_MAX_FREE_BESTS(mp))
+#endif
+
+/* 
+ * Functions.
+ */
+
+extern void
+       xfs_dir2_free_log_bests(struct xfs_trans *tp, struct xfs_dabuf *bp,
+                               int first, int last);
+
+extern int
+       xfs_dir2_leaf_to_node(struct xfs_da_args *args, struct xfs_dabuf *lbp);
+
+extern xfs_dahash_t
+       xfs_dir2_leafn_lasthash(struct xfs_dabuf *bp, int *count);
+
+extern int
+       xfs_dir2_leafn_lookup_int(struct xfs_dabuf *bp,
+                                 struct xfs_da_args *args, int *indexp,
+                                 struct xfs_da_state *state);
+
+extern int
+       xfs_dir2_leafn_order(struct xfs_dabuf *leaf1_bp,
+                            struct xfs_dabuf *leaf2_bp);
+
+extern int
+       xfs_dir2_leafn_split(struct xfs_da_state *state,
+                            struct xfs_da_state_blk *oldblk,
+                            struct xfs_da_state_blk *newblk);
+
+extern int
+       xfs_dir2_leafn_toosmall(struct xfs_da_state *state, int *action);
+
+extern void
+       xfs_dir2_leafn_unbalance(struct xfs_da_state *state,
+                                struct xfs_da_state_blk *drop_blk,
+                                struct xfs_da_state_blk *save_blk);
+
+extern int
+       xfs_dir2_node_addname(struct xfs_da_args *args);
+
+extern int
+       xfs_dir2_node_lookup(struct xfs_da_args *args);
+
+extern int
+       xfs_dir2_node_removename(struct xfs_da_args *args);
+
+extern int
+       xfs_dir2_node_replace(struct xfs_da_args *args);
+
+extern int
+       xfs_dir2_node_trim_free(struct xfs_da_args *args, xfs_fileoff_t fo,
+                               int *rvalp);
+
+#endif /* __XFS_DIR2_NODE_H__ */
diff --git a/include/xfs_dir2_sf.h b/include/xfs_dir2_sf.h
new file mode 100644 (file)
index 0000000..b74dd75
--- /dev/null
@@ -0,0 +1,256 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_DIR2_SF_H__
+#define        __XFS_DIR2_SF_H__
+
+/*
+ * Directory layout when stored internal to an inode.
+ *
+ * Small directories are packed as tightly as possible so as to
+ * fit into the literal area of the inode.
+ */
+
+struct dirent;
+struct uio;
+struct xfs_dabuf;
+struct xfs_da_args;
+struct xfs_dir2_block;
+struct xfs_inode;
+struct xfs_mount;
+struct xfs_trans;
+
+/*
+ * Maximum size of a shortform directory.
+ */
+#define        XFS_DIR2_SF_MAX_SIZE    \
+       (XFS_DINODE_MAX_SIZE - (uint)sizeof(xfs_dinode_core_t) - \
+        (uint)sizeof(xfs_agino_t))
+
+/*
+ * Inode number stored as 8 8-bit values.
+ */
+typedef        struct { __uint8_t i[8]; } xfs_dir2_ino8_t;
+
+#define        XFS_DIR2_SF_GET_INO8_ARCH(di,arch)      \
+       (xfs_ino_t)(DIRINO_GET_ARCH(&di,arch))
+#define        XFS_DIR2_SF_GET_INO8(di)                \
+        XFS_DIR2_SF_GET_INO8_ARCH(di,ARCH_NOCONVERT)
+
+/*
+ * Inode number stored as 4 8-bit values.
+ * Works a lot of the time, when all the inode numbers in a directory
+ * fit in 32 bits.
+ */
+typedef struct { __uint8_t i[4]; } xfs_dir2_ino4_t;
+#define        XFS_DIR2_SF_GET_INO4_ARCH(di,arch)      \
+       (xfs_ino_t)(DIRINO4_GET_ARCH(&di,arch))
+#define        XFS_DIR2_SF_GET_INO4(di)                \
+       XFS_DIR2_SF_GET_INO4_ARCH(di,ARCH_NOCONVERT)
+
+typedef union {
+       xfs_dir2_ino8_t i8;
+       xfs_dir2_ino4_t i4;
+} xfs_dir2_inou_t;
+#define        XFS_DIR2_MAX_SHORT_INUM ((xfs_ino_t)0xffffffffULL)
+
+/*
+ * Normalized offset (in a data block) of the entry, really xfs_dir2_data_off_t.
+ * Only need 16 bits, this is the byte offset into the single block form.
+ */
+typedef struct { __uint8_t i[2]; } xfs_dir2_sf_off_t;
+
+/*
+ * The parent directory has a dedicated field, and the self-pointer must
+ * be calculated on the fly.
+ *
+ * Entries are packed toward the top as tightly as possible.  The header
+ * and the elements must be bcopy()'d out into a work area to get correct
+ * alignment for the inode number fields.
+ */
+typedef struct xfs_dir2_sf_hdr {
+       __uint8_t               count;          /* count of entries */
+       __uint8_t               i8count;        /* count of 8-byte inode #s */
+       xfs_dir2_inou_t         parent;         /* parent dir inode number */
+} xfs_dir2_sf_hdr_t;
+
+typedef struct xfs_dir2_sf_entry {
+       __uint8_t               namelen;        /* actual name length */
+       xfs_dir2_sf_off_t       offset;         /* saved offset */
+       __uint8_t               name[1];        /* name, variable size */
+       xfs_dir2_inou_t         inumber;        /* inode number, var. offset */
+} xfs_dir2_sf_entry_t;
+
+typedef struct xfs_dir2_sf {
+       xfs_dir2_sf_hdr_t       hdr;            /* shortform header */
+       xfs_dir2_sf_entry_t     list[1];        /* shortform entries */
+} xfs_dir2_sf_t;
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_SF_HDR_SIZE)
+int xfs_dir2_sf_hdr_size(int i8count);
+#define        XFS_DIR2_SF_HDR_SIZE(i8count)   xfs_dir2_sf_hdr_size(i8count)
+#else
+#define        XFS_DIR2_SF_HDR_SIZE(i8count)   \
+       ((uint)sizeof(xfs_dir2_sf_hdr_t) - \
+        ((i8count) == 0) * \
+        ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_SF_INUMBERP)
+xfs_dir2_inou_t *xfs_dir2_sf_inumberp(xfs_dir2_sf_entry_t *sfep);
+#define        XFS_DIR2_SF_INUMBERP(sfep)      xfs_dir2_sf_inumberp(sfep)
+#else
+#define        XFS_DIR2_SF_INUMBERP(sfep)      \
+       ((xfs_dir2_inou_t *)&(sfep)->name[(sfep)->namelen])
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_SF_GET_INUMBER)
+xfs_intino_t xfs_dir2_sf_get_inumber_arch(xfs_dir2_sf_t *sfp, xfs_dir2_inou_t *from, 
+                                            xfs_arch_t arch);
+#define        XFS_DIR2_SF_GET_INUMBER_ARCH(sfp, from, arch)   \
+       xfs_dir2_sf_get_inumber_arch(sfp, from, arch)
+
+#else
+#define        XFS_DIR2_SF_GET_INUMBER_ARCH(sfp, from, arch)   \
+       ((sfp)->hdr.i8count == 0 ? \
+               (xfs_intino_t)XFS_DIR2_SF_GET_INO4_ARCH(*(from), arch) : \
+               (xfs_intino_t)XFS_DIR2_SF_GET_INO8_ARCH(*(from), arch))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_SF_PUT_INUMBER)
+void xfs_dir2_sf_put_inumber_arch(xfs_dir2_sf_t *sfp, xfs_ino_t *from,
+                                    xfs_dir2_inou_t *to, xfs_arch_t arch);
+#define        XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp,from,to,arch)  \
+       xfs_dir2_sf_put_inumber_arch(sfp,from,to,arch)
+#else
+#define        XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp,from,to,arch)  \
+       if ((sfp)->hdr.i8count == 0) { \
+            DIRINO4_COPY_ARCH(from,to,arch); \
+        } else { \
+            DIRINO_COPY_ARCH(from,to,arch); \
+        }
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_SF_GET_OFFSET)
+xfs_dir2_data_aoff_t xfs_dir2_sf_get_offset_arch(xfs_dir2_sf_entry_t *sfep, 
+                                                    xfs_arch_t arch);
+xfs_dir2_data_aoff_t xfs_dir2_sf_get_offset(xfs_dir2_sf_entry_t *sfep);
+#define        XFS_DIR2_SF_GET_OFFSET_ARCH(sfep,arch)  \
+        xfs_dir2_sf_get_offset_arch(sfep,arch)
+#else
+#define        XFS_DIR2_SF_GET_OFFSET_ARCH(sfep,arch)  \
+        INT_GET_UNALIGNED_16_ARCH(&(sfep)->offset.i,arch)
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_SF_PUT_OFFSET)
+void xfs_dir2_sf_put_offset_arch(xfs_dir2_sf_entry_t *sfep,
+                                   xfs_dir2_data_aoff_t off, xfs_arch_t arch);
+#define        XFS_DIR2_SF_PUT_OFFSET_ARCH(sfep,off,arch) \
+        xfs_dir2_sf_put_offset_arch(sfep,off,arch)
+#else
+#define        XFS_DIR2_SF_PUT_OFFSET_ARCH(sfep,off,arch)      \
+        INT_SET_UNALIGNED_16_ARCH(&(sfep)->offset.i,off,arch)
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_SF_ENTSIZE_BYNAME)
+int xfs_dir2_sf_entsize_byname(xfs_dir2_sf_t *sfp, int len);
+#define XFS_DIR2_SF_ENTSIZE_BYNAME(sfp,len)    \
+       xfs_dir2_sf_entsize_byname(sfp,len)
+#else
+#define XFS_DIR2_SF_ENTSIZE_BYNAME(sfp,len)    /* space a name uses */ \
+       ((uint)sizeof(xfs_dir2_sf_entry_t) - 1 + (len) - \
+        ((sfp)->hdr.i8count == 0) * \
+        ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_SF_ENTSIZE_BYENTRY)
+int xfs_dir2_sf_entsize_byentry(xfs_dir2_sf_t *sfp, xfs_dir2_sf_entry_t *sfep);
+#define XFS_DIR2_SF_ENTSIZE_BYENTRY(sfp,sfep)  \
+       xfs_dir2_sf_entsize_byentry(sfp,sfep)
+#else
+#define XFS_DIR2_SF_ENTSIZE_BYENTRY(sfp,sfep)  /* space an entry uses */ \
+       ((uint)sizeof(xfs_dir2_sf_entry_t) - 1 + (sfep)->namelen - \
+        ((sfp)->hdr.i8count == 0) * \
+        ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_SF_FIRSTENTRY)
+xfs_dir2_sf_entry_t *xfs_dir2_sf_firstentry(xfs_dir2_sf_t *sfp);
+#define XFS_DIR2_SF_FIRSTENTRY(sfp)    xfs_dir2_sf_firstentry(sfp)
+#else
+#define XFS_DIR2_SF_FIRSTENTRY(sfp)    /* first entry in struct */ \
+       ((xfs_dir2_sf_entry_t *) \
+        ((char *)(sfp) + XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count)))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_SF_NEXTENTRY)
+xfs_dir2_sf_entry_t *xfs_dir2_sf_nextentry(xfs_dir2_sf_t *sfp,
+                                          xfs_dir2_sf_entry_t *sfep);
+#define XFS_DIR2_SF_NEXTENTRY(sfp,sfep)                xfs_dir2_sf_nextentry(sfp,sfep)
+#else
+#define XFS_DIR2_SF_NEXTENTRY(sfp,sfep)                /* next entry in struct */ \
+       ((xfs_dir2_sf_entry_t *) \
+               ((char *)(sfep) + XFS_DIR2_SF_ENTSIZE_BYENTRY(sfp,sfep)))
+#endif
+
+/*
+ * Functions.
+ */
+
+extern int
+       xfs_dir2_block_sfsize(struct xfs_inode *dp,
+                             struct xfs_dir2_block *block,
+                             xfs_dir2_sf_hdr_t *sfhp);
+
+extern int
+       xfs_dir2_block_to_sf(struct xfs_da_args *args, struct xfs_dabuf *bp,
+                            int size, xfs_dir2_sf_hdr_t *sfhp);
+
+extern int
+       xfs_dir2_sf_addname(struct xfs_da_args *args);
+
+extern int
+       xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino);
+
+extern int
+       xfs_dir2_sf_getdents(struct xfs_inode *dp, struct uio *uio, int *eofp,
+                            struct dirent *dbp, xfs_dir2_put_t put);
+
+extern int
+       xfs_dir2_sf_lookup(struct xfs_da_args *args);
+
+extern int
+       xfs_dir2_sf_removename(struct xfs_da_args *args);
+
+extern int
+       xfs_dir2_sf_replace(struct xfs_da_args *args);
+
+#endif /* __XFS_DIR2_SF_H__ */
diff --git a/include/xfs_dir_leaf.h b/include/xfs_dir_leaf.h
new file mode 100644 (file)
index 0000000..d330ff8
--- /dev/null
@@ -0,0 +1,257 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_DIR_LEAF_H__
+#define        __XFS_DIR_LEAF_H__
+
+/*
+ * Directory layout, internal structure, access macros, etc.
+ *
+ * Large directories are structured around Btrees where all the data
+ * elements are in the leaf nodes.  Filenames are hashed into an int,
+ * then that int is used as the index into the Btree.  Since the hashval
+ * of a filename may not be unique, we may have duplicate keys.  The
+ * internal links in the Btree are logical block offsets into the file.
+ */
+
+struct dirent;
+struct uio;
+struct xfs_bmap_free;
+struct xfs_dabuf;
+struct xfs_da_args;
+struct xfs_da_state;
+struct xfs_da_state_blk;
+struct xfs_dir_put_args;
+struct xfs_inode;
+struct xfs_mount;
+struct xfs_trans;
+
+/*========================================================================
+ * Directory Structure when equal to XFS_LBSIZE(mp) bytes.
+ *========================================================================*/
+
+/*
+ * This is the structure of the leaf nodes in the Btree.
+ *
+ * Struct leaf_entry's are packed from the top.  Names grow from the bottom
+ * but are not packed.  The freemap contains run-length-encoded entries
+ * for the free bytes after the leaf_entry's, but only the N largest such,
+ * smaller runs are dropped.  When the freemap doesn't show enough space
+ * for an allocation, we compact the namelist area and try again.  If we
+ * still don't have enough space, then we have to split the block.
+ *
+ * Since we have duplicate hash keys, for each key that matches, compare
+ * the actual string.  The root and intermediate node search always takes
+ * the first-in-the-block key match found, so we should only have to work
+ * "forw"ard.  If none matches, continue with the "forw"ard leaf nodes
+ * until the hash key changes or the filename is found.
+ *
+ * The parent directory and the self-pointer are explicitly represented
+ * (ie: there are entries for "." and "..").
+ *
+ * Note that the count being a __uint16_t limits us to something like a 
+ * blocksize of 1.3MB in the face of worst case (short) filenames.
+ */
+#define XFS_DIR_LEAF_MAPSIZE   3       /* how many freespace slots */
+
+typedef struct xfs_dir_leafblock {
+       struct xfs_dir_leaf_hdr {       /* constant-structure header block */
+               xfs_da_blkinfo_t info;  /* block type, links, etc. */
+               __uint16_t count;       /* count of active leaf_entry's */
+               __uint16_t namebytes;   /* num bytes of name strings stored */
+               __uint16_t firstused;   /* first used byte in name area */
+               __uint8_t  holes;       /* != 0 if blk needs compaction */
+               __uint8_t  pad1;
+               struct xfs_dir_leaf_map {/* RLE map of free bytes */
+                       __uint16_t base; /* base of free region */
+                       __uint16_t size; /* run length of free region */
+               } freemap[XFS_DIR_LEAF_MAPSIZE]; /* N largest free regions */
+       } hdr;
+       struct xfs_dir_leaf_entry {     /* sorted on key, not name */
+               xfs_dahash_t hashval;   /* hash value of name */
+               __uint16_t nameidx;     /* index into buffer of name */
+               __uint8_t namelen;      /* length of name string */
+               __uint8_t pad2;
+       } entries[1];                   /* var sized array */
+       struct xfs_dir_leaf_name {
+               xfs_dir_ino_t inumber;  /* inode number for this key */
+               __uint8_t name[1];      /* name string itself */
+       } namelist[1];                  /* grows from bottom of buf */
+} xfs_dir_leafblock_t;
+typedef struct xfs_dir_leaf_hdr xfs_dir_leaf_hdr_t;
+typedef struct xfs_dir_leaf_map xfs_dir_leaf_map_t;
+typedef struct xfs_dir_leaf_entry xfs_dir_leaf_entry_t;
+typedef struct xfs_dir_leaf_name xfs_dir_leaf_name_t;
+
+/*
+ * Length of name for which a 512-byte block filesystem
+ * can get a double split.
+ */
+#define        XFS_DIR_LEAF_CAN_DOUBLE_SPLIT_LEN       \
+       (512 - (uint)sizeof(xfs_dir_leaf_hdr_t) - \
+        (uint)sizeof(xfs_dir_leaf_entry_t) * 2 - \
+        (uint)sizeof(xfs_dir_leaf_name_t) * 2 - (MAXNAMELEN - 2) + 1 + 1)
+
+typedef int (*xfs_dir_put_t)(struct xfs_dir_put_args *pa);
+
+typedef union {
+       xfs_off_t               o;              /* offset (cookie) */
+       /*
+        * Watch the order here (endian-ness dependent).
+        */
+       struct {
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+               xfs_dahash_t    h;      /* hash value */
+               __uint32_t      be;     /* block and entry */
+#else  /* __BYTE_ORDER == __BIG_ENDIAN */
+               __uint32_t      be;     /* block and entry */
+               xfs_dahash_t    h;      /* hash value */
+#endif /* __BYTE_ORDER == __BIG_ENDIAN */
+       } s;
+} xfs_dircook_t;
+
+#define        XFS_PUT_COOKIE(c,mp,bno,entry,hash)     \
+       ((c).s.be = XFS_DA_MAKE_BNOENTRY(mp, bno, entry), (c).s.h = (hash))
+
+#define        XFS_GET_DIR_INO_ARCH(mp,di,arch) \
+    DIRINO_GET_ARCH(&(di),arch)
+#define        XFS_GET_DIR_INO(mp,di) \
+    XFS_GET_DIR_INO_ARCH(mp,di,ARCH_NOCONVERT)
+
+typedef struct xfs_dir_put_args
+{
+       xfs_dircook_t   cook;           /* cookie of (next) entry */
+       xfs_intino_t    ino;            /* inode number */
+       struct dirent   *dbp;           /* buffer pointer */
+       char            *name;          /* directory entry name */
+       int             namelen;        /* length of name */
+       int             done;           /* output: set if value was stored */
+       xfs_dir_put_t   put;            /* put function ptr (i/o) */
+       struct uio      *uio;           /* uio control structure */
+       unsigned char   type;           /* file type (see include/linux/fs.h) */
+} xfs_dir_put_args_t;
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR_LEAF_ENTSIZE_BYNAME)
+int xfs_dir_leaf_entsize_byname(int len);
+#define XFS_DIR_LEAF_ENTSIZE_BYNAME(len)       xfs_dir_leaf_entsize_byname(len)
+#else
+#define XFS_DIR_LEAF_ENTSIZE_BYNAME(len)       /* space a name will use */ \
+       ((uint)sizeof(xfs_dir_leaf_name_t)-1 + len)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR_LEAF_ENTSIZE_BYENTRY)
+int xfs_dir_leaf_entsize_byentry(xfs_dir_leaf_entry_t *entry);
+#define XFS_DIR_LEAF_ENTSIZE_BYENTRY(entry)    \
+       xfs_dir_leaf_entsize_byentry(entry)
+#else
+#define XFS_DIR_LEAF_ENTSIZE_BYENTRY(entry)    /* space an entry will use */ \
+       ((uint)sizeof(xfs_dir_leaf_name_t)-1 + (entry)->namelen)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR_LEAF_NAMESTRUCT)
+xfs_dir_leaf_name_t *
+xfs_dir_leaf_namestruct(xfs_dir_leafblock_t *leafp, int offset);
+#define XFS_DIR_LEAF_NAMESTRUCT(leafp,offset)  \
+       xfs_dir_leaf_namestruct(leafp,offset)
+#else
+#define XFS_DIR_LEAF_NAMESTRUCT(leafp,offset)  /* point to name struct */ \
+       ((xfs_dir_leaf_name_t *)&((char *)(leafp))[offset])
+#endif
+
+/*========================================================================
+ * Function prototypes for the kernel.
+ *========================================================================*/
+
+/*
+ * Internal routines when dirsize < XFS_LITINO(mp).
+ */
+int xfs_dir_shortform_create(struct xfs_da_args *args, xfs_ino_t parent);
+int xfs_dir_shortform_addname(struct xfs_da_args *args);
+int xfs_dir_shortform_lookup(struct xfs_da_args *args);
+int xfs_dir_shortform_to_leaf(struct xfs_da_args *args);
+int xfs_dir_shortform_removename(struct xfs_da_args *args);
+int xfs_dir_shortform_getdents(struct xfs_inode *dp, struct uio *uio, int *eofp,
+                                     struct dirent *dbp, xfs_dir_put_t put);
+int xfs_dir_shortform_replace(struct xfs_da_args *args);
+
+/*
+ * Internal routines when dirsize == XFS_LBSIZE(mp).
+ */
+int xfs_dir_leaf_to_node(struct xfs_da_args *args);
+int xfs_dir_leaf_to_shortform(struct xfs_da_args *args);
+
+/*
+ * Routines used for growing the Btree.
+ */
+int    xfs_dir_leaf_create(struct xfs_da_args *args, xfs_dablk_t which_block,
+                                  struct xfs_dabuf **bpp);
+int    xfs_dir_leaf_split(struct xfs_da_state *state,
+                                 struct xfs_da_state_blk *oldblk,
+                                 struct xfs_da_state_blk *newblk);
+int    xfs_dir_leaf_add(struct xfs_dabuf *leaf_buffer,
+                               struct xfs_da_args *args, int insertion_index);
+int    xfs_dir_leaf_addname(struct xfs_da_args *args);
+int    xfs_dir_leaf_lookup_int(struct xfs_dabuf *leaf_buffer,
+                                      struct xfs_da_args *args,
+                                      int *index_found_at);
+int    xfs_dir_leaf_remove(struct xfs_trans *trans,
+                                  struct xfs_dabuf *leaf_buffer,
+                                  int index_to_remove);
+int    xfs_dir_leaf_getdents_int(struct xfs_dabuf *bp, struct xfs_inode *dp,
+                                        xfs_dablk_t bno, struct uio *uio,
+                                        int *eobp, struct dirent *dbp,
+                                        xfs_dir_put_t put, xfs_daddr_t nextda);
+
+/*
+ * Routines used for shrinking the Btree.
+ */
+int    xfs_dir_leaf_toosmall(struct xfs_da_state *state, int *retval);
+void   xfs_dir_leaf_unbalance(struct xfs_da_state *state,
+                                            struct xfs_da_state_blk *drop_blk,
+                                            struct xfs_da_state_blk *save_blk);
+
+/*
+ * Utility routines.
+ */
+uint   xfs_dir_leaf_lasthash(struct xfs_dabuf *bp, int *count);
+int    xfs_dir_leaf_order(struct xfs_dabuf *leaf1_bp,
+                                 struct xfs_dabuf *leaf2_bp);
+int    xfs_dir_put_dirent32_direct(xfs_dir_put_args_t *pa);
+int    xfs_dir_put_dirent32_uio(xfs_dir_put_args_t *pa);
+int    xfs_dir_put_dirent64_direct(xfs_dir_put_args_t *pa);
+int    xfs_dir_put_dirent64_uio(xfs_dir_put_args_t *pa);
+int    xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino);
+
+
+/*
+ * Global data.
+ */
+extern xfs_dahash_t    xfs_dir_hash_dot, xfs_dir_hash_dotdot;
+
+#endif /* __XFS_DIR_LEAF_H__ */
diff --git a/include/xfs_dir_sf.h b/include/xfs_dir_sf.h
new file mode 100644 (file)
index 0000000..d875da4
--- /dev/null
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_DIR_SF_H__
+#define        __XFS_DIR_SF_H__
+
+/*
+ * Directory layout when stored internal to an inode.
+ *
+ * Small directories are packed as tightly as possible so as to
+ * fit into the literal area of the inode.
+ */
+
+typedef struct { __uint8_t i[sizeof(xfs_ino_t)]; } xfs_dir_ino_t;
+
+/*
+ * The parent directory has a dedicated field, and the self-pointer must
+ * be calculated on the fly.
+ *
+ * Entries are packed toward the top as tight as possible.  The header
+ * and the elements much be bcopy()'d out into a work area to get correct
+ * alignment for the inode number fields.
+ */
+typedef struct xfs_dir_shortform {
+       struct xfs_dir_sf_hdr {         /* constant-structure header block */
+               xfs_dir_ino_t parent;   /* parent dir inode number */
+               __uint8_t count;        /* count of active entries */
+       } hdr;
+       struct xfs_dir_sf_entry {
+               xfs_dir_ino_t inumber;  /* referenced inode number */
+               __uint8_t namelen;      /* actual length of name (no NULL) */
+               __uint8_t name[1];      /* name */
+       } list[1];                      /* variable sized array */
+} xfs_dir_shortform_t;
+typedef struct xfs_dir_sf_hdr xfs_dir_sf_hdr_t;
+typedef struct xfs_dir_sf_entry xfs_dir_sf_entry_t;
+
+/*
+ * We generate this then sort it, so that readdirs are returned in
+ * hash-order.  Else seekdir won't work.
+ */
+typedef struct xfs_dir_sf_sort {
+       __uint8_t       entno;          /* .=0, ..=1, else entry# + 2 */
+       __uint8_t       seqno;          /* sequence # with same hash value */
+       __uint8_t       namelen;        /* length of name value (no null) */
+       xfs_dahash_t    hash;           /* this entry's hash value */
+       xfs_intino_t    ino;            /* this entry's inode number */
+       char            *name;          /* name value, pointer into buffer */
+} xfs_dir_sf_sort_t;
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR_SF_GET_DIRINO)
+void xfs_dir_sf_get_dirino_arch(xfs_dir_ino_t *from, xfs_ino_t *to, xfs_arch_t arch);
+void xfs_dir_sf_get_dirino(xfs_dir_ino_t *from, xfs_ino_t *to);
+#define        XFS_DIR_SF_GET_DIRINO_ARCH(from,to,arch)    xfs_dir_sf_get_dirino_arch(from, to, arch)
+#define        XFS_DIR_SF_GET_DIRINO(from,to)              xfs_dir_sf_get_dirino(from, to)
+#else
+#define        XFS_DIR_SF_GET_DIRINO_ARCH(from,to,arch)    DIRINO_COPY_ARCH(from,to,arch)      
+#define        XFS_DIR_SF_GET_DIRINO(from,to)              DIRINO_COPY_ARCH(from,to,ARCH_NOCONVERT)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR_SF_PUT_DIRINO)
+void xfs_dir_sf_put_dirino_arch(xfs_ino_t *from, xfs_dir_ino_t *to, xfs_arch_t arch);
+void xfs_dir_sf_put_dirino(xfs_ino_t *from, xfs_dir_ino_t *to);
+#define        XFS_DIR_SF_PUT_DIRINO_ARCH(from,to,arch)    xfs_dir_sf_put_dirino_arch(from, to, arch)
+#define        XFS_DIR_SF_PUT_DIRINO(from,to)              xfs_dir_sf_put_dirino(from, to)
+#else
+#define        XFS_DIR_SF_PUT_DIRINO_ARCH(from,to,arch)    DIRINO_COPY_ARCH(from,to,arch)
+#define        XFS_DIR_SF_PUT_DIRINO(from,to)              DIRINO_COPY_ARCH(from,to,ARCH_NOCONVERT)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR_SF_ENTSIZE_BYNAME)
+int xfs_dir_sf_entsize_byname(int len);
+#define XFS_DIR_SF_ENTSIZE_BYNAME(len)         xfs_dir_sf_entsize_byname(len)
+#else
+#define XFS_DIR_SF_ENTSIZE_BYNAME(len)         /* space a name uses */ \
+       ((uint)sizeof(xfs_dir_sf_entry_t)-1 + (len))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR_SF_ENTSIZE_BYENTRY)
+int xfs_dir_sf_entsize_byentry(xfs_dir_sf_entry_t *sfep);
+#define XFS_DIR_SF_ENTSIZE_BYENTRY(sfep)       xfs_dir_sf_entsize_byentry(sfep)
+#else
+#define XFS_DIR_SF_ENTSIZE_BYENTRY(sfep)       /* space an entry uses */ \
+       ((uint)sizeof(xfs_dir_sf_entry_t)-1 + (sfep)->namelen)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR_SF_NEXTENTRY)
+xfs_dir_sf_entry_t *xfs_dir_sf_nextentry(xfs_dir_sf_entry_t *sfep);
+#define XFS_DIR_SF_NEXTENTRY(sfep)             xfs_dir_sf_nextentry(sfep)
+#else
+#define XFS_DIR_SF_NEXTENTRY(sfep)             /* next entry in struct */ \
+       ((xfs_dir_sf_entry_t *) \
+               ((char *)(sfep) + XFS_DIR_SF_ENTSIZE_BYENTRY(sfep)))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR_SF_ALLFIT)
+int xfs_dir_sf_allfit(int count, int totallen);
+#define XFS_DIR_SF_ALLFIT(count,totallen)      \
+       xfs_dir_sf_allfit(count,totallen)
+#else
+#define XFS_DIR_SF_ALLFIT(count,totallen)      /* will all entries fit? */ \
+       ((uint)sizeof(xfs_dir_sf_hdr_t) + \
+              ((uint)sizeof(xfs_dir_sf_entry_t)-1)*(count) + (totallen))
+#endif
+
+#ifdef XFS_ALL_TRACE
+#define        XFS_DIR_TRACE
+#endif
+
+#if !defined(DEBUG)
+#undef XFS_DIR_TRACE
+#endif
+
+/*
+ * Kernel tracing support for directories.
+ */
+struct uio;
+struct xfs_inode;
+struct xfs_da_intnode;
+struct xfs_dinode;
+struct xfs_dir_leafblock;
+struct xfs_dir_leaf_entry;
+
+#define        XFS_DIR_TRACE_SIZE      4096    /* size of global trace buffer */     
+
+/*
+ * Trace record types.
+ */
+#define        XFS_DIR_KTRACE_G_DU     1       /* dp, uio */
+#define        XFS_DIR_KTRACE_G_DUB    2       /* dp, uio, bno */
+#define        XFS_DIR_KTRACE_G_DUN    3       /* dp, uio, node */
+#define        XFS_DIR_KTRACE_G_DUL    4       /* dp, uio, leaf */
+#define        XFS_DIR_KTRACE_G_DUE    5       /* dp, uio, leaf entry */
+#define        XFS_DIR_KTRACE_G_DUC    6       /* dp, uio, cookie */
+
+#if defined(XFS_DIR_TRACE)
+
+void xfs_dir_trace_g_du(char *where, struct xfs_inode *dp, struct uio *uio);
+void xfs_dir_trace_g_dub(char *where, struct xfs_inode *dp, struct uio *uio,
+                             xfs_dablk_t bno);
+void xfs_dir_trace_g_dun(char *where, struct xfs_inode *dp, struct uio *uio,
+                             struct xfs_da_intnode *node);
+void xfs_dir_trace_g_dul(char *where, struct xfs_inode *dp, struct uio *uio,
+                             struct xfs_dir_leafblock *leaf);
+void xfs_dir_trace_g_due(char *where, struct xfs_inode *dp, struct uio *uio,
+                             struct xfs_dir_leaf_entry *entry);
+void xfs_dir_trace_g_duc(char *where, struct xfs_inode *dp, struct uio *uio,
+                             xfs_off_t cookie);
+void xfs_dir_trace_enter(int type, char *where,
+                            __psunsigned_t a0, __psunsigned_t a1,
+                            __psunsigned_t a2, __psunsigned_t a3,
+                            __psunsigned_t a4, __psunsigned_t a5,
+                            __psunsigned_t a6, __psunsigned_t a7,
+                            __psunsigned_t a8, __psunsigned_t a9,
+                            __psunsigned_t a10, __psunsigned_t a11);
+#else
+#define        xfs_dir_trace_g_du(w,d,u)
+#define        xfs_dir_trace_g_dub(w,d,u,b)
+#define        xfs_dir_trace_g_dun(w,d,u,n)
+#define        xfs_dir_trace_g_dul(w,d,u,l)
+#define        xfs_dir_trace_g_due(w,d,u,e)
+#define        xfs_dir_trace_g_duc(w,d,u,c)
+#endif /* DEBUG */
+
+#endif /* __XFS_DIR_SF_H__ */
diff --git a/include/xfs_dqblk.h b/include/xfs_dqblk.h
new file mode 100644 (file)
index 0000000..ec1704d
--- /dev/null
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_DQBLK_H__
+#define __XFS_DQBLK_H__
+
+/*
+ * The ondisk form of a dquot structure.
+ */
+#define XFS_DQUOT_MAGIC                0x4451          /* 'DQ' */
+#define XFS_DQUOT_VERSION      (u_int8_t)0x01  /* latest version number */
+
+/* 
+ * This is the main portion of the on-disk representation of quota 
+ * information for a user. This is the q_core of the xfs_dquot_t that
+ * is kept in kernel memory. We pad this with some more expansion room
+ * to construct the on disk structure.
+ */
+typedef struct xfs_disk_dquot {
+/*16*/ u_int16_t       d_magic;        /* dquot magic = XFS_DQUOT_MAGIC */
+/*8 */ u_int8_t        d_version;      /* dquot version */
+/*8 */ u_int8_t        d_flags;        /* XFS_DQ_USER/DQ_PROJ */
+/*32*/ xfs_dqid_t      d_id;           /* user id or proj id */
+/*64*/ xfs_qcnt_t      d_blk_hardlimit;/* absolute limit on disk blks */
+/*64*/ xfs_qcnt_t      d_blk_softlimit;/* preferred limit on disk blks */
+/*64*/ xfs_qcnt_t      d_ino_hardlimit;/* maximum # allocated inodes */
+/*64*/ xfs_qcnt_t      d_ino_softlimit;/* preferred inode limit */
+/*64*/ xfs_qcnt_t      d_bcount;       /* disk blocks owned by the user */
+/*64*/ xfs_qcnt_t      d_icount;       /* inodes owned by the user */
+/*32*/ __int32_t       d_itimer;       /* zero if within inode limits if not, 
+                                          this is when we refuse service */
+/*32*/ __int32_t       d_btimer;       /* similar to above; for disk blocks */
+/*16*/ xfs_qwarncnt_t  d_iwarns;       /* warnings issued wrt num inodes */
+/*16*/ xfs_qwarncnt_t  d_bwarns;       /* warnings issued wrt disk blocks */
+/*32*/ __int32_t       d_pad0;         /* 64 bit align */
+/*64*/ xfs_qcnt_t      d_rtb_hardlimit;/* absolute limit on realtime blks */
+/*64*/ xfs_qcnt_t      d_rtb_softlimit;/* preferred limit on RT disk blks */
+/*64*/ xfs_qcnt_t      d_rtbcount;     /* realtime blocks owned */
+/*32*/ __int32_t       d_rtbtimer;     /* similar to above; for RT disk blocks */
+/*16*/ xfs_qwarncnt_t  d_rtbwarns;     /* warnings issued wrt RT disk blocks */
+/*16*/ __uint16_t      d_pad;
+} xfs_disk_dquot_t;
+
+/*
+ * This is what goes on disk. This is separated from the xfs_disk_dquot because
+ * carrying the unnecessary padding would be a waste of memory.
+ */
+typedef struct xfs_dqblk {
+       xfs_disk_dquot_t  dd_diskdq;    /* portion that lives incore as well */
+       char              dd_fill[32];  /* filling for posterity */
+} xfs_dqblk_t;
+
+/*
+ * flags for q_flags field in the dquot.
+ */
+#define XFS_DQ_USER            0x0001          /* a user quota */
+#define XFS_DQ_PROJ            0x0002          /* a project quota */
+
+#define XFS_DQ_FLOCKED         0x0008          /* flush lock taken */
+#define XFS_DQ_DIRTY           0x0010          /* dquot is dirty */
+#define XFS_DQ_WANT            0x0020          /* for lookup/reclaim race */
+#define XFS_DQ_INACTIVE                0x0040          /* dq off mplist & hashlist */
+#define XFS_DQ_MARKER          0x0080          /* sentinel */
+
+/*
+ * In the worst case, when both user and proj quotas on,
+ * we can have a max of three dquots changing in a single transaction.
+ */
+#define XFS_DQUOT_LOGRES(mp)   (sizeof(xfs_disk_dquot_t) * 3)
+
+#endif /* __XFS_DQBLK_H__ */
diff --git a/include/xfs_dquot_item.h b/include/xfs_dquot_item.h
new file mode 100644 (file)
index 0000000..f8d9049
--- /dev/null
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_DQUOT_ITEM_H__
+#define __XFS_DQUOT_ITEM_H__
+
+/*
+ * These are the structures used to lay out dquots and quotaoff
+ * records on the log. Quite similar to those of inodes.
+ */
+
+/*
+ * log format struct for dquots.
+ * The first two fields must be the type and size fitting into
+ * 32 bits : log_recovery code assumes that.
+ */
+typedef struct xfs_dq_logformat {
+       __uint16_t              qlf_type;      /* dquot log item type */
+       __uint16_t              qlf_size;      /* size of this item */
+       xfs_dqid_t              qlf_id;        /* usr/proj id number : 32 bits */
+       __int64_t               qlf_blkno;     /* blkno of dquot buffer */
+       __int32_t               qlf_len;       /* len of dquot buffer */
+       __uint32_t              qlf_boffset;   /* off of dquot in buffer */
+} xfs_dq_logformat_t;
+
+/*
+ * log format struct for QUOTAOFF records.
+ * The first two fields must be the type and size fitting into
+ * 32 bits : log_recovery code assumes that.
+ * We write two LI_QUOTAOFF logitems per quotaoff, the last one keeps a pointer
+ * to the first and ensures that the first logitem is taken out of the AIL
+ * only when the last one is securely committed.
+ */    
+typedef struct xfs_qoff_logformat {
+       unsigned short          qf_type;        /* quotaoff log item type */
+       unsigned short          qf_size;        /* size of this item */
+       unsigned int            qf_flags;       /* USR and/or PRJ */
+       char                    qf_pad[12];     /* padding for future */
+} xfs_qoff_logformat_t;
+
+
+#ifdef __KERNEL__
+
+struct xfs_dquot;
+struct xfs_trans;
+struct xfs_mount;
+typedef struct xfs_dq_logitem {
+       xfs_log_item_t           qli_item;         /* common portion */
+       struct xfs_dquot        *qli_dquot;        /* dquot ptr */
+       xfs_lsn_t                qli_flush_lsn;    /* lsn at last flush */
+       unsigned short           qli_pushbuf_flag; /* one bit used in push_ail */
+#ifdef DEBUG
+       uint64_t                 qli_push_owner;
+#endif
+       xfs_dq_logformat_t       qli_format;       /* logged structure */
+} xfs_dq_logitem_t;
+
+
+typedef struct xfs_qoff_logitem {
+       xfs_log_item_t           qql_item;      /* common portion */
+       struct xfs_qoff_logitem *qql_start_lip; /* qoff-start logitem, if any */
+       xfs_qoff_logformat_t     qql_format;    /* logged structure */
+} xfs_qoff_logitem_t;
+
+
+extern void               xfs_qm_dquot_logitem_init(struct xfs_dquot *);
+extern xfs_qoff_logitem_t *xfs_qm_qoff_logitem_init(struct xfs_mount *, 
+                                                   xfs_qoff_logitem_t *, uint);
+extern xfs_qoff_logitem_t *xfs_trans_get_qoff_item(struct xfs_trans *, 
+                                                  xfs_qoff_logitem_t *, uint);
+extern void               xfs_trans_log_quotaoff_item(struct xfs_trans *,
+                                                      xfs_qoff_logitem_t *);
+
+#endif /* __KERNEL__ */
+
+#endif /* __XFS_DQUOT_ITEM_H__ */
diff --git a/include/xfs_extfree_item.h b/include/xfs_extfree_item.h
new file mode 100644 (file)
index 0000000..640f8e2
--- /dev/null
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef        __XFS_EXTFREE_ITEM_H__
+#define        __XFS_EXTFREE_ITEM_H__
+
+struct xfs_mount;
+struct xfs_zone;
+
+typedef struct xfs_extent {
+       xfs_dfsbno_t    ext_start;
+       xfs_extlen_t    ext_len;
+} xfs_extent_t;
+
+/*
+ * This is the structure used to lay out an efi log item in the
+ * log.  The efi_extents field is a variable size array whose
+ * size is given by efi_nextents.
+ */
+typedef struct xfs_efi_log_format {
+       unsigned short          efi_type;       /* efi log item type */
+       unsigned short          efi_size;       /* size of this item */
+       uint                    efi_nextents;   /* # extents to free */
+       __uint64_t              efi_id;         /* efi identifier */
+       xfs_extent_t            efi_extents[1]; /* array of extents to free */
+} xfs_efi_log_format_t;
+
+/*
+ * This is the structure used to lay out an efd log item in the
+ * log.  The efd_extents array is a variable size array whose
+ * size is given by efd_nextents;
+ */
+typedef struct xfs_efd_log_format {
+       unsigned short          efd_type;       /* efd log item type */
+       unsigned short          efd_size;       /* size of this item */
+       uint                    efd_nextents;   /* # of extents freed */
+       __uint64_t              efd_efi_id;     /* id of corresponding efi */
+       xfs_extent_t            efd_extents[1]; /* array of extents freed */
+} xfs_efd_log_format_t;
+
+
+#ifdef __KERNEL__
+
+/*
+ * Max number of extents in fast allocation path.
+ */
+#define        XFS_EFI_MAX_FAST_EXTENTS        16
+
+/*
+ * Define EFI flags.
+ */
+#define        XFS_EFI_RECOVERED       0x1
+#define        XFS_EFI_COMMITTED       0x2
+#define        XFS_EFI_CANCELED        0x4
+
+/*
+ * This is the "extent free intention" log item.  It is used
+ * to log the fact that some extents need to be free.  It is
+ * used in conjunction with the "extent free done" log item
+ * described below.
+ */
+typedef struct xfs_efi_log_item {
+       xfs_log_item_t          efi_item;
+       uint                    efi_flags;      /* misc flags */
+       uint                    efi_next_extent;
+       xfs_efi_log_format_t    efi_format;
+} xfs_efi_log_item_t;
+
+/*
+ * This is the "extent free done" log item.  It is used to log
+ * the fact that some extents earlier mentioned in an efi item
+ * have been freed.
+ */
+typedef struct xfs_efd_log_item {
+       xfs_log_item_t          efd_item;
+       xfs_efi_log_item_t      *efd_efip;
+       uint                    efd_next_extent;
+       xfs_efd_log_format_t    efd_format;
+} xfs_efd_log_item_t;
+
+/*
+ * Max number of extents in fast allocation path.
+ */
+#define        XFS_EFD_MAX_FAST_EXTENTS        16
+
+extern struct xfs_zone *xfs_efi_zone;
+extern struct xfs_zone *xfs_efd_zone;
+
+xfs_efi_log_item_t     *xfs_efi_init(struct xfs_mount *, uint);
+xfs_efd_log_item_t     *xfs_efd_init(struct xfs_mount *, xfs_efi_log_item_t *,
+                                     uint);
+
+#endif /* __KERNEL__ */
+
+#endif /* __XFS_EXTFREE_ITEM_H__ */
diff --git a/include/xfs_fs.h b/include/xfs_fs.h
new file mode 100644 (file)
index 0000000..81a8c3c
--- /dev/null
@@ -0,0 +1,476 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef _LINUX_XFS_FS_H
+#define _LINUX_XFS_FS_H
+
+#include <linux/types.h>
+#include <asm/ioctl.h>
+
+
+/*
+ * SGI's XFS filesystem's major stuff (constants, structures)
+ */
+
+#define XFS_SUPER_MAGIC 0x58465342
+#define XFS_NAME       "xfs"
+
+struct biosize {
+       __u32           biosz_flags;
+       __s32           biosz_read;
+       __s32           biosz_write;
+       __s32           dfl_biosz_read;
+       __s32           dfl_biosz_write;
+};
+       
+/* 
+ * direct I/O attribute record used with F_DIOINFO
+ * d_miniosz is the min xfer size, xfer size multiple and file seek offset
+ * alignment.
+ */
+struct dioattr {
+       __u32           d_mem;          /* data buffer memory alignment */
+       __u32           d_miniosz;      /* min xfer size                */
+       __u32           d_maxiosz;      /* max xfer size                */
+};
+
+/*
+ * Structure for F_FSGETXATTR[A] and F_FSSETXATTR.
+ */
+struct fsxattr {
+       __u32           fsx_xflags;     /* xflags field value (get/set) */
+       __u32           fsx_extsize;    /* extsize field value (get/set)*/
+       __u32           fsx_nextents;   /* nextents field value (get)   */
+       unsigned char   fsx_pad[16];
+};
+
+/*
+ * Flags for the bs_xflags/fsx_xflags field
+ * There should be a one-to-one correspondence between these flags and the
+ * XFS_DIFLAG_s.
+ */
+#define        XFS_XFLAG_REALTIME      0x00000001
+#define        XFS_XFLAG_PREALLOC      0x00000002
+#define        XFS_XFLAG_HASATTR       0x80000000      /* no DIFLAG for this   */
+#define        XFS_XFLAG_ALL           \
+       ( XFS_XFLAG_REALTIME|XFS_XFLAG_PREALLOC|XFS_XFLAG_HASATTR )
+
+
+/*
+ * Structure for F_GETBMAP.
+ * On input, fill in bmv_offset and bmv_length of the first structure
+ * to indicate the area of interest in the file, and bmv_entry with the
+ * number of array elements given.  The first structure is updated on
+ * return to give the offset and length for the next call.
+ */
+struct getbmap {
+       __s64           bmv_offset;     /* file offset of segment in blocks */
+       __s64           bmv_block;      /* starting block (64-bit daddr_t)  */
+       __s64           bmv_length;     /* length of segment, blocks        */
+       __s32           bmv_count;      /* # of entries in array incl. 1st  */
+       __s32           bmv_entries;    /* # of entries filled in (output)  */
+};
+
+/*
+ *     Structure for F_GETBMAPX.  The fields bmv_offset through bmv_entries
+ *     are used exactly as in the getbmap structure.  The getbmapx structure
+ *     has additional bmv_iflags and bmv_oflags fields. The bmv_iflags field
+ *     is only used for the first structure.  It contains input flags 
+ *     specifying F_GETBMAPX actions.  The bmv_oflags field is filled in
+ *     by the F_GETBMAPX command for each returned structure after the first.
+ */
+struct getbmapx {
+       __s64           bmv_offset;     /* file offset of segment in blocks */
+       __s64           bmv_block;      /* starting block (64-bit daddr_t)  */
+       __s64           bmv_length;     /* length of segment, blocks        */
+       __s32           bmv_count;      /* # of entries in array incl. 1st  */
+       __s32           bmv_entries;    /* # of entries filled in (output). */
+       __s32           bmv_iflags;     /* input flags (1st structure)      */
+       __s32           bmv_oflags;     /* output flags (after 1st structure)*/
+       __s32           bmv_unused1;    /* future use                       */
+       __s32           bmv_unused2;    /* future use                       */
+};
+
+/*     bmv_iflags values - set by F_GETBMAPX caller.   */
+
+#define        BMV_IF_ATTRFORK         0x1     /* return attr fork rather than data */
+#define BMV_IF_NO_DMAPI_READ   0x2     /* Do not generate DMAPI read event  */
+#define BMV_IF_PREALLOC                0x4     /* rtn status BMV_OF_PREALLOC if req */
+
+#define BMV_IF_VALID   (BMV_IF_ATTRFORK|BMV_IF_NO_DMAPI_READ|BMV_IF_PREALLOC)
+
+/*     bmv_oflags values - returned from F_GETBMAPX for each non-header segment */
+
+#define BMV_OF_PREALLOC                0x1     /* segment = unwritten pre-allocation */
+
+/*     Convert getbmap <-> getbmapx - move fields from p1 to p2. */
+
+#define        GETBMAP_CONVERT(p1,p2) {        \
+       p2.bmv_offset = p1.bmv_offset;  \
+       p2.bmv_block = p1.bmv_block;    \
+       p2.bmv_length = p1.bmv_length;  \
+       p2.bmv_count = p1.bmv_count;    \
+       p2.bmv_entries = p1.bmv_entries;  }
+
+#ifdef __KERNEL__
+
+/*     Kernel only bmv_iflags value.   */
+#define        BMV_IF_EXTENDED 0x40000000      /* getpmapx if set */
+
+#endif /* __KERNEL__ */
+
+/*
+ * Structure for F_FSSETDM.
+ * For use by backup and restore programs to set the XFS on-disk inode
+ * fields di_dmevmask and di_dmstate.  These must be set to exactly and
+ * only values previously obtained via xfs_bulkstat!  (Specifically the
+ * xfs_bstat_t fields bs_dmevmask and bs_dmstate.)
+ */
+struct fsdmidata {
+       __s32           fsd_dmevmask;   /* corresponds to di_dmevmask */
+       __u16           fsd_padding;
+       __u16           fsd_dmstate;    /* corresponds to di_dmstate  */
+};
+
+/*
+ * File segment locking set data type for 64 bit access.
+ * Also used for all the RESV/FREE interfaces.
+ */
+typedef struct xfs_flock64 {
+       __s16           l_type;
+       __s16           l_whence;
+       __s64           l_start;
+       __s64           l_len;          /* len == 0 means until end of file */
+        __s32          l_sysid;
+        pid_t          l_pid;
+       __s32           l_pad[4];       /* reserve area                     */
+} xfs_flock64_t;
+
+/*
+ * Output for XFS_IOC_FSGEOMETRY
+ */
+typedef struct xfs_fsop_geom {
+       __u32           blocksize;      /* filesystem (data) block size */
+       __u32           rtextsize;      /* realtime extent size         */
+       __u32           agblocks;       /* fsblocks in an AG            */
+       __u32           agcount;        /* number of allocation groups  */
+       __u32           logblocks;      /* fsblocks in the log          */
+       __u32           sectsize;       /* (data) sector size, bytes    */
+       __u32           inodesize;      /* inode size in bytes          */
+       __u32           imaxpct;        /* max allowed inode space(%)   */
+       __u64           datablocks;     /* fsblocks in data subvolume   */
+       __u64           rtblocks;       /* fsblocks in realtime subvol  */
+       __u64           rtextents;      /* rt extents in realtime subvol*/
+       __u64           logstart;       /* starting fsblock of the log  */
+       unsigned char   uuid[16];       /* unique id of the filesystem  */
+       __u32           sunit;          /* stripe unit, fsblocks        */
+       __u32           swidth;         /* stripe width, fsblocks       */
+       __s32           version;        /* structure version            */
+       __u32           flags;          /* superblock version flags     */
+       __u32           logsectsize;    /* log sector size, bytes       */
+       __u32           rtsectsize;     /* realtime sector size, bytes  */
+       __u32           dirblocksize;   /* directory block size, bytes  */
+} xfs_fsop_geom_t;
+
+/* Output for XFS_FS_COUNTS */
+typedef struct xfs_fsop_counts {
+       __u64   freedata;       /* free data section blocks */
+       __u64   freertx;        /* free rt extents */
+       __u64   freeino;        /* free inodes */
+       __u64   allocino;       /* total allocated inodes */
+} xfs_fsop_counts_t;
+
+/* Input/Output for XFS_GET_RESBLKS and XFS_SET_RESBLKS */
+typedef struct xfs_fsop_resblks {
+       __u64  resblks;
+       __u64  resblks_avail;
+} xfs_fsop_resblks_t;
+
+#define        XFS_FSOP_GEOM_VERSION   0
+
+#define        XFS_FSOP_GEOM_FLAGS_ATTR        0x01    /* attributes in use    */
+#define        XFS_FSOP_GEOM_FLAGS_NLINK       0x02    /* 32-bit nlink values  */
+#define        XFS_FSOP_GEOM_FLAGS_QUOTA       0x04    /* quotas enabled       */
+#define        XFS_FSOP_GEOM_FLAGS_IALIGN      0x08    /* inode alignment      */
+#define        XFS_FSOP_GEOM_FLAGS_DALIGN      0x10    /* large data alignment */
+#define        XFS_FSOP_GEOM_FLAGS_SHARED      0x20    /* read-only shared     */
+#define        XFS_FSOP_GEOM_FLAGS_EXTFLG      0x40    /* special extent flag  */
+#define        XFS_FSOP_GEOM_FLAGS_DIRV2       0x80    /* directory version 2  */
+
+
+/*
+ * Minimum and maximum sizes need for growth checks
+ */
+#define        XFS_MIN_AG_BLOCKS       64
+#define        XFS_MIN_LOG_BLOCKS      512
+#define        XFS_MAX_LOG_BLOCKS      (64 * 1024)
+#define        XFS_MIN_LOG_BYTES       (256 * 1024)
+#define        XFS_MAX_LOG_BYTES       (128 * 1024 * 1024)
+
+/*
+ * XFS_IOC_FSGROWFSDATA
+ */
+typedef struct xfs_growfs_data {
+       __u64           newblocks;      /* new data subvol size, fsblocks */
+       __u32           imaxpct;        /* new inode space percentage limit */
+} xfs_growfs_data_t;
+
+/*
+ * XFS_IOC_FSGROWFSLOG
+ */
+typedef struct xfs_growfs_log {
+       __u32           newblocks;      /* new log size, fsblocks */
+       __u32           isint;          /* 1 if new log is internal */
+} xfs_growfs_log_t;
+
+/*
+ * XFS_IOC_FSGROWFSRT
+ */
+typedef struct xfs_growfs_rt {
+       __u64           newblocks;      /* new realtime size, fsblocks */
+       __u32           extsize;        /* new realtime extent size, fsblocks */
+} xfs_growfs_rt_t;
+
+
+/*
+ * Structures returned from ioctl XFS_IOC_FSBULKSTAT & XFS_IOC_FSBULKSTAT_SINGLE
+ */
+typedef struct xfs_bstime {
+       time_t          tv_sec;         /* seconds              */
+       __s32           tv_nsec;        /* and nanoseconds      */
+} xfs_bstime_t;
+
+typedef struct xfs_bstat {
+       __u64           bs_ino;         /* inode number                 */
+       __u16           bs_mode;        /* type and mode                */
+       __u16           bs_nlink;       /* number of links              */
+       __u32           bs_uid;         /* user id                      */
+       __u32           bs_gid;         /* group id                     */
+       __u32           bs_rdev;        /* device value                 */
+       __s32           bs_blksize;     /* block size                   */
+       __s64           bs_size;        /* file size                    */
+       xfs_bstime_t    bs_atime;       /* access time                  */
+       xfs_bstime_t    bs_mtime;       /* modify time                  */
+       xfs_bstime_t    bs_ctime;       /* inode change time            */
+       int64_t         bs_blocks;      /* number of blocks             */
+       __u32           bs_xflags;      /* extended flags               */
+       __s32           bs_extsize;     /* extent size                  */
+       __s32           bs_extents;     /* number of extents            */
+       __u32           bs_gen;         /* generation count             */
+       __u16           bs_projid;      /* project id                   */
+       unsigned char   bs_pad[14];     /* pad space, unused            */
+       __u32           bs_dmevmask;    /* DMIG event mask              */
+       __u16           bs_dmstate;     /* DMIG state info              */
+       __u16           bs_aextents;    /* attribute number of extents  */
+} xfs_bstat_t;
+
+/*
+ * The user-level BulkStat Request interface structure.
+ */
+typedef struct xfs_fsop_bulkreq {
+       __u64           *lastip;        /* last inode # pointer         */
+       __s32           icount;         /* count of entries in buffer   */
+       void            *ubuffer;       /* user buffer for inode desc.  */
+       __s32           *ocount;        /* output count pointer         */
+} xfs_fsop_bulkreq_t;
+
+
+/*
+ * Structures returned from xfs_inumbers syssgi routine.
+ */
+typedef struct xfs_inogrp {
+       __u64           xi_startino;    /* starting inode number        */
+       __s32           xi_alloccount;  /* # bits set in allocmask      */
+       __u64           xi_allocmask;   /* mask of allocated inodes     */
+} xfs_inogrp_t;
+
+
+/*
+ * The user-level Handle Request interface structure.
+ */
+typedef struct xfs_fsop_handlereq {
+       __u32           fd;             /* fd for FD_TO_HANDLE          */
+       void            *path;          /* user pathname                */
+       __u32           oflags;         /* open flags                   */
+       void            *ihandle;       /* user supplied handle         */
+       __u32           ihandlen;       /* user supplied length         */
+       void            *ohandle;       /* user buffer for handle       */
+       __u32           *ohandlen;      /* user buffer length           */
+} xfs_fsop_handlereq_t;
+
+#if (defined(DEBUG) || defined(INDUCE_IO_ERROR))
+/*
+ * Error injection.
+ */
+typedef struct xfs_error_injection {
+       __s32           fd;
+       __s32           errtag;
+} xfs_error_injection_t;
+#endif /* DEBUG || INDUCE_IO_ERROR */
+
+/*
+ * File system identifier. Should be unique (at least per machine).
+ */
+typedef struct {
+       __u32 val[2];                   /* file system id type */
+} xfs_fsid_t;
+
+/*
+ * File identifier.  Should be unique per filesystem on a single machine.
+ * This is typically called by a stateless file server in order to generate
+ * "file handles".
+ */
+#define MAXFIDSZ        46
+
+typedef struct fid {
+       __u16           fid_len;                /* length of data in bytes */
+       unsigned char   fid_data[MAXFIDSZ];     /* data (variable length)  */
+} fid_t;
+
+typedef struct xfs_fid {
+       __u16   xfs_fid_len;            /* length of remainder  */
+       __u16   xfs_fid_pad;
+       __u32   xfs_fid_gen;            /* generation number    */
+       __u64   xfs_fid_ino;            /* 64 bits inode number */
+} xfs_fid_t;
+
+typedef struct xfs_fid2 {
+       __u16   fid_len;        /* length of remainder */
+       __u16   fid_pad;        /* padding, must be zero */
+       __u32   fid_gen;        /* generation number */
+       __u64   fid_ino;        /* inode number */
+} xfs_fid2_t;
+
+typedef struct xfs_handle {
+       union {
+               __s64       align;      /* force alignment of ha_fid     */
+               xfs_fsid_t  _ha_fsid;   /* unique file system identifier */
+       } ha_u;
+       xfs_fid_t       ha_fid;         /* file system specific file ID  */
+} xfs_handle_t;
+
+#define ha_fsid ha_u._ha_fsid
+
+#define        XFS_HSIZE(handle)       (((char *) &(handle).ha_fid.xfs_fid_pad  \
+                                - (char *) &(handle))                    \
+                                + (handle).ha_fid.xfs_fid_len)
+
+#define XFS_HANDLE_CMP(h1, h2) bcmp(h1, h2, sizeof (xfs_handle_t))
+
+#define FSHSIZE                sizeof (fsid_t)
+
+
+/*
+ * ioctl commands that replace IRIX fcntl()'s
+ * For 'documentation' purposed more than anything else,
+ * the "cmd #" field reflects the IRIX fcntl number.
+ */
+#define        XFS_IOC_ALLOCSP         _IOW ('X', 10, struct xfs_flock64)
+#define        XFS_IOC_FREESP          _IOW ('X', 11, struct xfs_flock64)
+#define        XFS_IOC_DIOINFO         _IOR ('X', 30, struct dioattr)
+#define        XFS_IOC_FSGETXATTR      _IOR ('X', 31, struct fsxattr)
+#define        XFS_IOC_FSSETXATTR      _IOW ('X', 32, struct fsxattr)
+#define        XFS_IOC_ALLOCSP64       _IOW ('X', 36, struct xfs_flock64)
+#define        XFS_IOC_FREESP64        _IOW ('X', 37, struct xfs_flock64)
+#define        XFS_IOC_GETBMAP         _IOWR('X', 38, struct getbmap)
+#define        XFS_IOC_FSSETDM         _IOW ('X', 39, struct fsdmidata)
+#define        XFS_IOC_RESVSP          _IOW ('X', 40, struct xfs_flock64)
+#define        XFS_IOC_UNRESVSP        _IOW ('X', 41, struct xfs_flock64)
+#define        XFS_IOC_RESVSP64        _IOW ('X', 42, struct xfs_flock64)
+#define        XFS_IOC_UNRESVSP64      _IOW ('X', 43, struct xfs_flock64)
+#define        XFS_IOC_GETBMAPA        _IOWR('X', 44, struct getbmap)
+#define        XFS_IOC_FSGETXATTRA     _IOR ('X', 45, struct fsxattr)
+#define        XFS_IOC_SETBIOSIZE      _IOW ('X', 46, struct biosize)
+#define        XFS_IOC_GETBIOSIZE      _IOR ('X', 47, struct biosize)
+#define        XFS_IOC_GETBMAPX        _IOWR('X', 56, struct getbmap)
+
+/*
+ * ioctl commands that replace IRIX syssgi()'s
+ */
+#define        XFS_IOC_FSGEOMETRY           _IOR ('X', 100, struct xfs_fsop_geom)
+#define        XFS_IOC_FSBULKSTAT           _IOWR('X', 101, struct xfs_fsop_bulkreq)
+#define        XFS_IOC_FSBULKSTAT_SINGLE    _IOWR('X', 102, struct xfs_fsop_bulkreq)
+#define        XFS_IOC_FSINUMBERS           _IOWR('X', 103, struct xfs_fsop_bulkreq)
+#define        XFS_IOC_PATH_TO_FSHANDLE     _IOWR('X', 104, struct xfs_fsop_handlereq)
+#define        XFS_IOC_PATH_TO_HANDLE       _IOWR('X', 105, struct xfs_fsop_handlereq)
+#define        XFS_IOC_FD_TO_HANDLE         _IOWR('X', 106, struct xfs_fsop_handlereq)
+#define        XFS_IOC_OPEN_BY_HANDLE       _IOWR('X', 107, struct xfs_fsop_handlereq)
+#define        XFS_IOC_READLINK_BY_HANDLE   _IOWR('X', 108, struct xfs_fsop_handlereq)
+#define XFS_IOC_SWAPEXT                     _IOWR('X', 109, struct xfs_swapext)
+#define        XFS_IOC_FSGROWFSDATA         _IOW('X', 110, struct xfs_growfs_data)
+#define        XFS_IOC_FSGROWFSLOG          _IOW('X', 111, struct xfs_growfs_log)
+#define        XFS_IOC_FSGROWFSRT           _IOW('X', 112, struct xfs_growfs_rt)
+#define        XFS_IOC_FSCOUNTS             _IOR ('X', 113, struct xfs_fsop_counts)
+#define        XFS_IOC_SET_RESBLKS          _IOR ('X', 114, struct xfs_fsop_resblks)
+#define        XFS_IOC_GET_RESBLKS          _IOR ('X', 115, struct xfs_fsop_resblks)
+#if (defined(DEBUG) || defined(INDUCE_IO_ERROR))
+#define XFS_IOC_ERROR_INJECTION  _IOW('X', 116, struct xfs_error_injection)
+#define XFS_IOC_ERROR_CLEARALL   _IOW('X', 117, struct xfs_error_injection)
+#endif /* DEBUG || INDUCE_IO_ERROR */
+
+/*
+ * ioctl command to export information not in standard interfaces
+ *     140: IRIX statvfs.f_fstr field - UUID from the superblock
+ */
+#define XFS_IOC_GETFSUUID      _IOR ('X', 140, unsigned char[16])
+
+
+/*
+ * Block I/O parameterization.  A basic block (BB) is the lowest size of
+ * filesystem allocation, and must == NBPSCTR.  Length units given to bio
+ * routines are in BB's.
+ */
+#define        BBSHIFT         9
+#define        BBSIZE          (1<<BBSHIFT)
+#define        BBMASK          (BBSIZE-1)
+#define        BTOBB(bytes)    (((__u64)(bytes) + BBSIZE - 1) >> BBSHIFT)
+#define        BTOBBT(bytes)   ((__u64)(bytes) >> BBSHIFT)
+#define        BBTOB(bbs)      ((bbs) << BBSHIFT)
+#define OFFTOBB(bytes) (((__u64)(bytes) + BBSIZE - 1) >> BBSHIFT)
+#define        OFFTOBBT(bytes) ((__u64)(bytes) >> BBSHIFT)
+#define        BBTOOFF(bbs)    ((__u64)(bbs) << BBSHIFT)     
+     
+#define SEEKLIMIT32    0x7fffffff
+#define BBSEEKLIMIT32  BTOBBT(SEEKLIMIT32)
+#define SEEKLIMIT      0x7fffffffffffffffLL
+#define BBSEEKLIMIT    OFFTOBBT(SEEKLIMIT)     
+
+
+#ifdef __KERNEL__
+
+/*
+ * Function prototypes
+ */
+extern int init_xfs_fs(void);
+
+#endif /* __KERNEL__ */
+
+#endif /* _LINUX_XFS_FS_H */
diff --git a/include/xfs_ialloc.h b/include/xfs_ialloc.h
new file mode 100644 (file)
index 0000000..2693501
--- /dev/null
@@ -0,0 +1,181 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_IALLOC_H__
+#define        __XFS_IALLOC_H__
+
+struct xfs_buf;
+struct xfs_dinode;
+struct xfs_mount;
+struct xfs_trans;
+
+/*
+ * Allocation parameters for inode allocation.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IALLOC_INODES)
+int xfs_ialloc_inodes(struct xfs_mount *mp);
+#define        XFS_IALLOC_INODES(mp)   xfs_ialloc_inodes(mp)
+#else
+#define        XFS_IALLOC_INODES(mp)   ((mp)->m_ialloc_inos)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IALLOC_BLOCKS)
+xfs_extlen_t xfs_ialloc_blocks(struct xfs_mount *mp);
+#define        XFS_IALLOC_BLOCKS(mp)   xfs_ialloc_blocks(mp)
+#else
+#define        XFS_IALLOC_BLOCKS(mp)   ((mp)->m_ialloc_blks)
+#endif
+
+/*
+ * For small block file systems, move inodes in clusters of this size.
+ * When we don't have a lot of memory, however, we go a bit smaller
+ * to reduce the number of AGI and ialloc btree blocks we need to keep
+ * around for xfs_dilocate().  We choose which one to use in
+ * xfs_mount_int().
+ */
+#define        XFS_INODE_BIG_CLUSTER_SIZE      8192
+#define        XFS_INODE_SMALL_CLUSTER_SIZE    4096
+#define        XFS_INODE_CLUSTER_SIZE(mp)      (mp)->m_inode_cluster_size
+
+/*
+ * Make an inode pointer out of the buffer/offset.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_MAKE_IPTR)
+struct xfs_dinode *xfs_make_iptr(struct xfs_mount *mp, struct xfs_buf *b, int o);
+#define        XFS_MAKE_IPTR(mp,b,o)           xfs_make_iptr(mp,b,o)
+#else
+#define        XFS_MAKE_IPTR(mp,b,o) \
+       ((xfs_dinode_t *)(xfs_buf_offset(b, (o) << (mp)->m_sb.sb_inodelog)))
+#endif
+
+/*
+ * Find a free (set) bit in the inode bitmask.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IALLOC_FIND_FREE)
+int xfs_ialloc_find_free(xfs_inofree_t *fp);
+#define        XFS_IALLOC_FIND_FREE(fp)        xfs_ialloc_find_free(fp)
+#else
+#define        XFS_IALLOC_FIND_FREE(fp)        xfs_lowbit64(*(fp))
+#endif
+
+
+#ifdef __KERNEL__
+
+/*
+ * Prototypes for visible xfs_ialloc.c routines.
+ */
+
+/*
+ * Allocate an inode on disk.
+ * Mode is used to tell whether the new inode will need space, and whether
+ * it is a directory.
+ *
+ * To work within the constraint of one allocation per transaction,
+ * xfs_dialloc() is designed to be called twice if it has to do an
+ * allocation to make more free inodes.  If an inode is 
+ * available without an allocation, agbp would be set to the current
+ * agbp and alloc_done set to false.
+ * If an allocation needed to be done, agbp would be set to the
+ * inode header of the allocation group and alloc_done set to true.
+ * The caller should then commit the current transaction and allocate a new
+ * transaction.  xfs_dialloc() should then be called again with
+ * the agbp value returned from the previous call.
+ *
+ * Once we successfully pick an inode its number is returned and the
+ * on-disk data structures are updated.  The inode itself is not read
+ * in, since doing so would break ordering constraints with xfs_reclaim.
+ *
+ * *agbp should be set to NULL on the first call, *alloc_done set to FALSE.
+ */
+int                                    /* error */
+xfs_dialloc(
+       struct xfs_trans *tp,           /* transaction pointer */
+       xfs_ino_t       parent,         /* parent inode (directory) */
+       mode_t          mode,           /* mode bits for new inode */
+       int             okalloc,        /* ok to allocate more space */
+       struct xfs_buf  **agbp,         /* buf for a.g. inode header */
+       boolean_t       *alloc_done,    /* an allocation was done to replenish
+                                          the free inodes */
+       xfs_ino_t       *inop);         /* inode number allocated */
+
+/*
+ * Free disk inode.  Carefully avoids touching the incore inode, all
+ * manipulations incore are the caller's responsibility.
+ * The on-disk inode is not changed by this operation, only the
+ * btree (free inode mask) is changed.
+ */
+int                                    /* error */
+xfs_difree(
+       struct xfs_trans *tp,           /* transaction pointer */
+       xfs_ino_t       inode);         /* inode to be freed */
+
+/*
+ * Return the location of the inode in bno/len/off,
+ * for mapping it into a buffer.
+ */
+int
+xfs_dilocate(
+       struct xfs_mount *mp,           /* file system mount structure */
+       struct xfs_trans *tp,           /* transaction pointer */
+       xfs_ino_t       ino,            /* inode to locate */
+       xfs_fsblock_t   *bno,           /* output: block containing inode */
+       int             *len,           /* output: num blocks in cluster*/
+       int             *off,           /* output: index in block of inode */
+       uint            flags);         /* flags for inode btree lookup */
+
+/*
+ * Compute and fill in value of m_in_maxlevels.
+ */
+void
+xfs_ialloc_compute_maxlevels(
+       struct xfs_mount *mp);          /* file system mount structure */
+
+/*
+ * Log specified fields for the ag hdr (inode section)
+ */
+void
+xfs_ialloc_log_agi(
+       struct xfs_trans *tp,           /* transaction pointer */
+       struct xfs_buf  *bp,            /* allocation group header buffer */
+       int             fields);        /* bitmask of fields to log */
+
+/*
+ * Read in the allocation group header (inode allocation section)
+ */
+int                                    /* error */
+xfs_ialloc_read_agi(
+       struct xfs_mount *mp,           /* file system mount structure */
+       struct xfs_trans *tp,           /* transaction pointer */
+       xfs_agnumber_t  agno,           /* allocation group number */
+       struct xfs_buf  **bpp);         /* allocation group hdr buf */
+
+#endif /* __KERNEL__ */
+
+#endif /* __XFS_IALLOC_H__ */
diff --git a/include/xfs_ialloc_btree.h b/include/xfs_ialloc_btree.h
new file mode 100644 (file)
index 0000000..e49b259
--- /dev/null
@@ -0,0 +1,318 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_IALLOC_BTREE_H__
+#define        __XFS_IALLOC_BTREE_H__
+
+/*
+ * Inode map on-disk structures
+ */
+
+struct xfs_buf;
+struct xfs_btree_cur;
+struct xfs_btree_sblock;
+struct xfs_mount;
+
+/*
+ * There is a btree for the inode map per allocation group.
+ */
+#define        XFS_IBT_MAGIC   0x49414254      /* 'IABT' */
+
+typedef        __uint64_t      xfs_inofree_t;
+#define        XFS_INODES_PER_CHUNK    (NBBY * sizeof(xfs_inofree_t))
+#define        XFS_INODES_PER_CHUNK_LOG        (XFS_NBBYLOG + 3)
+#define        XFS_INOBT_ALL_FREE      ((xfs_inofree_t)-1)
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_MASKN)
+xfs_inofree_t xfs_inobt_maskn(int i, int n);
+#define        XFS_INOBT_MASKN(i,n)            xfs_inobt_maskn(i,n)
+#else
+#define        XFS_INOBT_MASKN(i,n)    \
+       ((((n) >= XFS_INODES_PER_CHUNK ? \
+               (xfs_inofree_t)0 : ((xfs_inofree_t)1 << (n))) - 1) << (i))
+#endif
+
+/*
+ * Data record structure
+ */
+typedef struct xfs_inobt_rec
+{
+       xfs_agino_t     ir_startino;    /* starting inode number */
+       __int32_t       ir_freecount;   /* count of free inodes (set bits) */
+       xfs_inofree_t   ir_free;        /* free inode mask */
+} xfs_inobt_rec_t;
+
+/*
+ * Key structure
+ */
+typedef struct xfs_inobt_key
+{
+       xfs_agino_t     ir_startino;    /* starting inode number */
+} xfs_inobt_key_t;
+
+typedef xfs_agblock_t xfs_inobt_ptr_t; /* btree pointer type */
+                                       /* btree block header type */
+typedef        struct xfs_btree_sblock xfs_inobt_block_t;
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BUF_TO_INOBT_BLOCK)
+xfs_inobt_block_t *xfs_buf_to_inobt_block(struct xfs_buf *bp);
+#define        XFS_BUF_TO_INOBT_BLOCK(bp)      xfs_buf_to_inobt_block(bp)
+#else
+#define        XFS_BUF_TO_INOBT_BLOCK(bp) ((xfs_inobt_block_t *)(XFS_BUF_PTR(bp)))
+#endif
+
+/*
+ * Bit manipulations for ir_free.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_MASK)
+xfs_inofree_t xfs_inobt_mask(int i);
+#define        XFS_INOBT_MASK(i)               xfs_inobt_mask(i)
+#else
+#define        XFS_INOBT_MASK(i)               ((xfs_inofree_t)1 << (i))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_IS_FREE)
+int xfs_inobt_is_free(xfs_inobt_rec_t *rp, int i, xfs_arch_t arch);
+#define        XFS_INOBT_IS_FREE(rp,i,arch)    xfs_inobt_is_free(rp,i,arch)
+#else
+#define        XFS_INOBT_IS_FREE(rp,i,arch)    ((INT_GET((rp)->ir_free, arch) \
+                                         & XFS_INOBT_MASK(i)) != 0)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_SET_FREE)
+void xfs_inobt_set_free(xfs_inobt_rec_t *rp, int i, xfs_arch_t arch);
+#define        XFS_INOBT_SET_FREE(rp,i,arch)   xfs_inobt_set_free(rp,i,arch)
+#else
+#define        XFS_INOBT_SET_FREE(rp,i,arch)   (INT_MOD_EXPR((rp)->ir_free, arch, |= XFS_INOBT_MASK(i)))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_CLR_FREE)
+void xfs_inobt_clr_free(xfs_inobt_rec_t *rp, int i, xfs_arch_t arch);
+#define        XFS_INOBT_CLR_FREE(rp,i,arch)   xfs_inobt_clr_free(rp,i,arch)
+#else
+#define        XFS_INOBT_CLR_FREE(rp,i,arch)   (INT_MOD_EXPR((rp)->ir_free, arch, &= ~XFS_INOBT_MASK(i)))
+#endif
+
+/*
+ * Real block structures have a size equal to the disk block size.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_BLOCK_SIZE)
+int xfs_inobt_block_size(int lev, struct xfs_btree_cur *cur);
+#define        XFS_INOBT_BLOCK_SIZE(lev,cur)   xfs_inobt_block_size(lev,cur)
+#else
+#define        XFS_INOBT_BLOCK_SIZE(lev,cur)   (1 << (cur)->bc_blocklog)
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_BLOCK_MAXRECS)
+int xfs_inobt_block_maxrecs(int lev, struct xfs_btree_cur *cur);
+#define        XFS_INOBT_BLOCK_MAXRECS(lev,cur)        xfs_inobt_block_maxrecs(lev,cur)
+#else
+#define        XFS_INOBT_BLOCK_MAXRECS(lev,cur)        \
+       ((cur)->bc_mp->m_inobt_mxr[lev != 0])
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_BLOCK_MINRECS)
+int xfs_inobt_block_minrecs(int lev, struct xfs_btree_cur *cur);
+#define        XFS_INOBT_BLOCK_MINRECS(lev,cur)        xfs_inobt_block_minrecs(lev,cur)
+#else
+#define        XFS_INOBT_BLOCK_MINRECS(lev,cur)        \
+       ((cur)->bc_mp->m_inobt_mnr[lev != 0])
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_IS_LAST_REC)
+int xfs_inobt_is_last_rec(struct xfs_btree_cur *cur);
+#define        XFS_INOBT_IS_LAST_REC(cur)      xfs_inobt_is_last_rec(cur)
+#else
+#define        XFS_INOBT_IS_LAST_REC(cur)      \
+       ((cur)->bc_ptrs[0] == \
+               INT_GET(XFS_BUF_TO_INOBT_BLOCK((cur)->bc_bufs[0])->bb_numrecs, ARCH_CONVERT))
+#endif
+
+/*
+ * Maximum number of inode btree levels.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IN_MAXLEVELS)
+int xfs_in_maxlevels(struct xfs_mount *mp);
+#define        XFS_IN_MAXLEVELS(mp)            xfs_in_maxlevels(mp)
+#else
+#define        XFS_IN_MAXLEVELS(mp)            ((mp)->m_in_maxlevels)
+#endif
+
+/*
+ * block numbers in the AG.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IBT_BLOCK)
+xfs_agblock_t xfs_ibt_block(struct xfs_mount *mp);
+#define        XFS_IBT_BLOCK(mp)               xfs_ibt_block(mp)
+#else
+#define        XFS_IBT_BLOCK(mp)       ((xfs_agblock_t)(XFS_CNT_BLOCK(mp) + 1))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_PREALLOC_BLOCKS)
+xfs_agblock_t xfs_prealloc_blocks(struct xfs_mount *mp);
+#define        XFS_PREALLOC_BLOCKS(mp)         xfs_prealloc_blocks(mp)
+#else
+#define        XFS_PREALLOC_BLOCKS(mp) ((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1))
+#endif
+
+/*
+ * Record, key, and pointer address macros for btree blocks.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_REC_ADDR)
+xfs_inobt_rec_t *
+xfs_inobt_rec_addr(xfs_inobt_block_t *bb, int i, struct xfs_btree_cur *cur);
+#define        XFS_INOBT_REC_ADDR(bb,i,cur)    xfs_inobt_rec_addr(bb,i,cur)
+#else
+#define        XFS_INOBT_REC_ADDR(bb,i,cur)    \
+       XFS_BTREE_REC_ADDR(XFS_INOBT_BLOCK_SIZE(0,cur), xfs_inobt, bb, i, \
+               XFS_INOBT_BLOCK_MAXRECS(0, cur))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_KEY_ADDR)
+xfs_inobt_key_t *
+xfs_inobt_key_addr(xfs_inobt_block_t *bb, int i, struct xfs_btree_cur *cur);
+#define        XFS_INOBT_KEY_ADDR(bb,i,cur)    xfs_inobt_key_addr(bb,i,cur)
+#else
+#define        XFS_INOBT_KEY_ADDR(bb,i,cur)    \
+       XFS_BTREE_KEY_ADDR(XFS_INOBT_BLOCK_SIZE(1,cur), xfs_inobt, bb, i, \
+               XFS_INOBT_BLOCK_MAXRECS(1, cur))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_PTR_ADDR)
+xfs_inobt_ptr_t *
+xfs_inobt_ptr_addr(xfs_inobt_block_t *bb, int i, struct xfs_btree_cur *cur);
+#define        XFS_INOBT_PTR_ADDR(bb,i,cur)    xfs_inobt_ptr_addr(bb,i,cur)
+#else
+#define        XFS_INOBT_PTR_ADDR(bb,i,cur)    \
+       XFS_BTREE_PTR_ADDR(XFS_INOBT_BLOCK_SIZE(1,cur), xfs_inobt, bb, i, \
+               XFS_INOBT_BLOCK_MAXRECS(1, cur))
+#endif
+
+/*
+ * Prototypes for externally visible routines.
+ */
+
+/*
+ * Decrement cursor by one record at the level.
+ * For nonzero levels the leaf-ward information is untouched.
+ */
+int                                    /* error */
+xfs_inobt_decrement(
+       struct xfs_btree_cur    *cur,   /* btree cursor */
+       int                     level,  /* level in btree, 0 is leaf */
+       int                     *stat); /* success/failure */
+
+#ifdef _NOTYET_
+/*
+ * Delete the record pointed to by cur.
+ * The cursor refers to the place where the record was (could be inserted)
+ * when the operation returns.
+ */
+int                                    /* error */
+xfs_inobt_delete(
+       struct xfs_btree_cur    *cur,   /* btree cursor */
+       int                     *stat); /* success/failure */
+#endif /* _NOTYET_ */
+
+/*
+ * Get the data from the pointed-to record.
+ */
+int                                    /* error */
+xfs_inobt_get_rec(
+       struct xfs_btree_cur    *cur,   /* btree cursor */
+       xfs_agino_t             *ino,   /* output: starting inode of chunk */
+       __int32_t               *fcnt,  /* output: number of free inodes */
+       xfs_inofree_t           *free,  /* output: free inode mask */
+       int                     *stat,  /* output: success/failure */
+       xfs_arch_t              arch);  /* output: architecture */
+
+/*
+ * Increment cursor by one record at the level.
+ * For nonzero levels the leaf-ward information is untouched.
+ */
+int                                    /* error */
+xfs_inobt_increment(
+       struct xfs_btree_cur    *cur,   /* btree cursor */
+       int                     level,  /* level in btree, 0 is leaf */
+       int                     *stat); /* success/failure */
+
+/*
+ * Insert the current record at the point referenced by cur.
+ * The cursor may be inconsistent on return if splits have been done.
+ */
+int                                    /* error */
+xfs_inobt_insert(
+       struct xfs_btree_cur    *cur,   /* btree cursor */
+       int                     *stat); /* success/failure */
+
+/*
+ * Lookup the record equal to ino in the btree given by cur.
+ */
+int                                    /* error */
+xfs_inobt_lookup_eq(
+       struct xfs_btree_cur    *cur,   /* btree cursor */
+       xfs_agino_t             ino,    /* starting inode of chunk */
+       __int32_t               fcnt,   /* free inode count */
+       xfs_inofree_t           free,   /* free inode mask */
+       int                     *stat); /* success/failure */
+
+/*
+ * Lookup the first record greater than or equal to ino
+ * in the btree given by cur.
+ */
+int                                    /* error */
+xfs_inobt_lookup_ge(
+       struct xfs_btree_cur    *cur,   /* btree cursor */
+       xfs_agino_t             ino,    /* starting inode of chunk */
+       __int32_t               fcnt,   /* free inode count */
+       xfs_inofree_t           free,   /* free inode mask */
+       int                     *stat); /* success/failure */
+
+/*
+ * Lookup the first record less than or equal to ino
+ * in the btree given by cur.
+ */
+int                                    /* error */
+xfs_inobt_lookup_le(
+       struct xfs_btree_cur    *cur,   /* btree cursor */
+       xfs_agino_t             ino,    /* starting inode of chunk */
+       __int32_t               fcnt,   /* free inode count */
+       xfs_inofree_t           free,   /* free inode mask */
+       int                     *stat); /* success/failure */
+/*
+ * Update the record referred to by cur, to the value given
+ * by [ino, fcnt, free].
+ * This either works (return 0) or gets an EFSCORRUPTED error.
+ */
+int                                    /* error */
+xfs_inobt_update(
+       struct xfs_btree_cur    *cur,   /* btree cursor */
+       xfs_agino_t             ino,    /* starting inode of chunk */
+       __int32_t               fcnt,   /* free inode count */
+       xfs_inofree_t           free);  /* free inode mask */
+
+#endif /* __XFS_IALLOC_BTREE_H__ */
diff --git a/include/xfs_imap.h b/include/xfs_imap.h
new file mode 100644 (file)
index 0000000..54b58d6
--- /dev/null
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_IMAP_H__
+#define        __XFS_IMAP_H__
+
+/*
+ * This is the structure passed to xfs_imap() to map
+ * an inode number to its on disk location.
+ */
+typedef struct xfs_imap {
+       xfs_daddr_t     im_blkno;       /* starting BB of inode chunk */
+       uint            im_len;         /* length in BBs of inode chunk */
+       xfs_agblock_t   im_agblkno;     /* logical block of inode chunk in ag */
+       ushort          im_ioffset;     /* inode offset in block in "inodes" */
+       ushort          im_boffset;     /* inode offset in block in bytes */
+} xfs_imap_t;
+       
+#ifdef __KERNEL__
+struct xfs_mount;
+struct xfs_trans;
+int    xfs_imap(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
+                xfs_imap_t *, uint);
+#endif
+
+#endif /* __XFS_IMAP_H__ */
diff --git a/include/xfs_inode.h b/include/xfs_inode.h
new file mode 100644 (file)
index 0000000..742ca12
--- /dev/null
@@ -0,0 +1,615 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef        __XFS_INODE_H__
+#define        __XFS_INODE_H__
+
+/*
+ * File incore extent information, present for each of data & attr forks.
+ */
+#define        XFS_INLINE_EXTS 2
+#define        XFS_INLINE_DATA 32
+typedef struct xfs_ifork {
+       int                     if_bytes;       /* bytes in if_u1 */
+       int                     if_real_bytes;  /* bytes allocated in if_u1 */
+       xfs_bmbt_block_t        *if_broot;      /* file's incore btree root */
+       short                   if_broot_bytes; /* bytes allocated for root */
+       unsigned char           if_flags;       /* per-fork flags */
+       unsigned char           if_ext_max;     /* max # of extent records */
+       xfs_extnum_t            if_lastex;      /* last if_extents used */
+       union {
+               xfs_bmbt_rec_t  *if_extents;    /* linear map file exts */
+               char            *if_data;       /* inline file data */
+       } if_u1;
+       union {
+               xfs_bmbt_rec_t  if_inline_ext[XFS_INLINE_EXTS];
+                                               /* very small file extents */
+               char            if_inline_data[XFS_INLINE_DATA];
+                                               /* very small file data */
+               xfs_dev_t       if_rdev;        /* dev number if special */
+               uuid_t          if_uuid;        /* mount point value */
+       } if_u2;
+} xfs_ifork_t;
+
+/*
+ * Flags for xfs_ichgtime().
+ */
+#define        XFS_ICHGTIME_MOD        0x1     /* data fork modification timestamp */
+#define        XFS_ICHGTIME_ACC        0x2     /* data fork access timestamp */
+#define        XFS_ICHGTIME_CHG        0x4     /* inode field change timestamp */
+
+/*
+ * Per-fork incore inode flags.
+ */
+#define        XFS_IFINLINE    0x0001  /* Inline data is read in */
+#define        XFS_IFEXTENTS   0x0002  /* All extent pointers are read in */
+#define        XFS_IFBROOT     0x0004  /* i_broot points to the bmap b-tree root */
+
+/*
+ * Flags for xfs_imap() and xfs_dilocate().
+ */
+#define        XFS_IMAP_LOOKUP         0x1
+
+/*
+ * Maximum number of extent pointers in if_u1.if_extents.
+ */
+#define        XFS_MAX_INCORE_EXTENTS  32768
+
+
+#ifdef __KERNEL__
+struct bhv_desc;
+struct cred;
+struct ktrace;
+struct vnode;
+struct xfs_buf;
+struct xfs_bmap_free;
+struct xfs_bmbt_irec;
+struct xfs_bmbt_block;
+struct xfs_ext_attr;
+struct xfs_inode;
+struct xfs_inode_log_item;
+struct xfs_mount;
+struct xfs_trans;
+struct xfs_dquot;
+struct pm;
+
+
+/*
+ * This structure is used to communicate which extents of a file
+ * were holes when a write started from xfs_write_file() to
+ * xfs_strat_read().  This is necessary so that we can know which
+ * blocks need to be zeroed when they are read in in xfs_strat_read()
+ * if they weren\'t allocated when the buffer given to xfs_strat_read()
+ * was mapped.
+ *
+ * We keep a list of these attached to the inode.  The list is
+ * protected by the inode lock and the fact that the io lock is
+ * held exclusively by writers.
+ */
+typedef struct xfs_gap {
+       struct xfs_gap  *xg_next;
+       xfs_fileoff_t   xg_offset_fsb;
+       xfs_extlen_t    xg_count_fsb;
+} xfs_gap_t;
+
+/*
+ * This structure is used to hold common pieces of the buffer
+ * and file for xfs_dio_write and xfs_dio_read.
+ */
+typedef        struct xfs_dio {
+       struct xfs_buf  *xd_bp;
+       bhv_desc_t      *xd_bdp;
+       struct xfs_inode *xd_ip;
+       struct xfs_iocore *xd_io;
+       struct cred     *xd_cr;
+       struct pm       *xd_pmp;
+       int             xd_blkalgn;
+       int             xd_ioflag;
+       xfs_off_t               xd_start;
+       size_t          xd_length;
+} xfs_dio_t;
+
+
+typedef struct xfs_iocore {
+       void                    *io_obj;        /* pointer to container
+                                                * inode or dcxvn structure */
+       struct xfs_mount        *io_mount;      /* fs mount struct ptr */
+       mrlock_t                *io_lock;       /* inode lock */
+       mrlock_t                *io_iolock;     /* inode IO lock */
+       sema_t                  *io_flock;      /* inode flush lock */
+       mutex_t                 io_rlock;       /* inode readahead mutex */
+
+       /* I/O state */
+       xfs_off_t               io_offset;      /* last buf offset */
+       xfs_off_t               io_next_offset; /* seq read detector */
+       unsigned int            io_last_req_sz; /* last read size */
+       unsigned int            io_size;        /* file io buffer len */
+       xfs_fsize_t             io_new_size;    /* sz when write completes */
+       xfs_off_t               io_write_offset;
+                                               /* start off of curr write */
+       xfs_fileoff_t           io_reada_blkno; /* next blk to start ra */
+       xfs_gap_t               *io_gap_list;   /* hole list in write range */
+       unsigned int            io_readio_blocks;       /* read buffer size */
+       unsigned int            io_writeio_blocks;      /* write buffer size */
+       uchar_t                 io_readio_log;  /* log2 of read buffer size */
+       uchar_t                 io_writeio_log; /* log2 of write buffer size */
+       uchar_t                 io_max_io_log;  /* max r/w io value */
+       int                     io_queued_bufs; /* count of xfsd queued bufs*/
+
+       /* Miscellaneous state. */
+       unsigned int            io_flags;       /* IO related flags */
+
+       /* DMAPI state */
+       __uint32_t      io_dmevmask;    /* DMIG event mask */
+       __uint16_t      io_dmstate;     /* DMIG state info */
+} xfs_iocore_t;
+
+#define XFS_IO_INODE(io)       ((xfs_inode_t *) ((io)->io_obj))
+#define XFS_IO_DCXVN(io)       ((dcxvn_t *) ((io)->io_obj))
+
+/*
+ * Flags in the flags field
+ */
+
+#define XFS_IOCORE_ISXFS       0x01
+#define XFS_IOCORE_ISCXFS      0x02
+#define XFS_IOCORE_RT          0x04
+#define XFS_IOCORE_UIOSZ       0x08
+
+#define IO_IS_XFS(io)  ((io)->io_flags & XFS_IOCORE_ISXFS)
+
+/*
+ * Clear out the read-ahead state in the in-core inode.
+ * We actually only need to clear i_next_offset and
+ * i_last_req_sz to get the effect of making all the
+ * read ahead state unusable.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INODE_CLEAR_READ_AHEAD)
+void xfs_inode_clear_read_ahead(xfs_iocore_t *io);
+#define XFS_INODE_CLEAR_READ_AHEAD(io)          xfs_inode_clear_read_ahead(io)
+#else
+#define XFS_INODE_CLEAR_READ_AHEAD(io)  {       \
+               mutex_lock(&((io)->io_rlock), PINOD);    \
+               (io)->io_next_offset = 0;          \
+               (io)->io_last_req_sz = 0;          \
+               mutex_unlock(&((io)->io_rlock)); }
+#endif
+
+
+/*
+ * xfs_iocore prototypes
+ */
+
+extern void xfs_iocore_inode_init(struct xfs_inode *);
+extern void xfs_iocore_inode_reinit(struct xfs_inode *);
+extern void xfs_iocore_reset(xfs_iocore_t *);
+extern void xfs_iocore_destroy(xfs_iocore_t *);
+
+
+/*
+ * This is the type used in the xfs inode hash table.
+ * An array of these is allocated for each mounted
+ * file system to hash the inodes for that file system.
+ */
+typedef struct xfs_ihash {
+       struct xfs_inode        *ih_next;       
+       mrlock_t                ih_lock;
+       uint                    ih_version;
+} xfs_ihash_t;
+#if defined(MP)
+#pragma set type attribute xfs_ihash align=128
+#endif
+
+/*
+ * Inode hashing and hash bucket locking.
+ */
+#define XFS_BUCKETS(mp) (37*(mp)->m_sb.sb_agcount-1)
+#define XFS_IHASH(mp,ino) ((mp)->m_ihash + (((uint)ino) % (mp)->m_ihsize))
+
+/*
+ * This is the xfs inode cluster hash.  This hash is used by xfs_iflush to
+ * find inodes that share a cluster and can be flushed to disk at the same
+ * time.
+ */
+
+typedef struct xfs_chashlist {
+       struct xfs_chashlist    *chl_next;
+       struct xfs_inode        *chl_ip;
+       xfs_daddr_t             chl_blkno;      /* starting block number of 
+                                                * the cluster */
+#ifdef DEBUG
+       struct xfs_buf          *chl_buf;       /* debug: the inode buffer */
+#endif
+} xfs_chashlist_t;
+
+typedef struct xfs_chash {
+       xfs_chashlist_t         *ch_list;
+       lock_t                  ch_lock;
+} xfs_chash_t;
+
+
+/*
+ * This is the xfs in-core inode structure.
+ * Most of the on-disk inode is embedded in the i_d field.
+ *
+ * The extent pointers/inline file space, however, are managed
+ * separately.  The memory for this information is pointed to by
+ * the if_u1 unions depending on the type of the data.
+ * This is used to linearize the array of extents for fast in-core
+ * access.  This is used until the file's number of extents
+ * surpasses XFS_MAX_INCORE_EXTENTS, at which point all extent pointers
+ * are accessed through the buffer cache.
+ *
+ * Other state kept in the in-core inode is used for identification,
+ * locking, transactional updating, etc of the inode.
+ *
+ * Generally, we do not want to hold the i_rlock while holding the 
+ * i_ilock. Hierarchy is i_iolock followed by i_rlock. 
+ *
+ * xfs_iptr_t contains all the inode fields upto and including the
+ * i_mnext and i_mprev fields, it is used as a marker in the inode
+ * chain off the mount structure by xfs_sync calls.
+ */
+
+typedef struct {
+       struct xfs_ihash        *ip_hash;       /* pointer to hash header */
+       struct xfs_inode        *ip_next;       /* inode hash link forw */
+       struct xfs_inode        *ip_mnext;      /* next inode in mount list */
+       struct xfs_inode        *ip_mprev;      /* ptr to prev inode */
+       struct xfs_inode        **ip_prevp;     /* ptr to prev i_next */
+       struct xfs_mount        *ip_mount;      /* fs mount struct ptr */
+} xfs_iptr_t;
+
+typedef struct xfs_inode {
+       /* Inode linking and identification information. */
+       struct xfs_ihash        *i_hash;        /* pointer to hash header */
+       struct xfs_inode        *i_next;        /* inode hash link forw */
+       struct xfs_inode        *i_mnext;       /* next inode in mount list */
+       struct xfs_inode        *i_mprev;       /* ptr to prev inode */
+       struct xfs_inode        **i_prevp;      /* ptr to prev i_next */
+       struct xfs_mount        *i_mount;       /* fs mount struct ptr */
+       struct bhv_desc         i_bhv_desc;     /* inode behavior descriptor*/
+       struct xfs_dquot        *i_udquot;      /* user dquot */
+       struct xfs_dquot        *i_pdquot;      /* project dquot */
+
+       /* Inode location stuff */
+       xfs_ino_t               i_ino;          /* inode number (agno/agino)*/
+       xfs_daddr_t             i_blkno;        /* blkno of inode buffer */
+       dev_t                   i_dev;          /* dev for this inode */
+       ushort                  i_len;          /* len of inode buffer */
+       ushort                  i_boffset;      /* off of inode in buffer */
+
+       /* Extent information. */
+       xfs_ifork_t             *i_afp;         /* attribute fork pointer */
+       xfs_ifork_t             i_df;           /* data fork */
+
+       /* Transaction and locking information. */
+       struct xfs_trans        *i_transp;      /* ptr to owning transaction*/
+       struct xfs_inode_log_item *i_itemp;     /* logging information */
+       mrlock_t                i_lock;         /* inode lock */
+       mrlock_t                i_iolock;       /* inode IO lock */
+       sema_t                  i_flock;        /* inode flush lock */
+       unsigned int            i_pincount;     /* inode pin count */
+       sv_t                    i_pinsema;      /* inode pin sema */
+       lock_t                  i_ipinlock;     /* inode pinning mutex */
+       struct xfs_inode        *i_release;     /* inode to unref */
+
+       /* I/O state */
+       xfs_iocore_t            i_iocore;       /* I/O core */
+
+       /* Miscellaneous state. */
+       unsigned short          i_flags;        /* see defined flags below */
+       unsigned short          i_update_core;  /* timestamps/size is dirty */
+       unsigned short          i_update_size;  /* di_size field is dirty */
+       unsigned int            i_gen;          /* generation count */
+       unsigned int            i_delayed_blks; /* count of delay alloc blks */
+       struct xfs_ext_attr     *i_ext_attr;    /* Critical ext attributes */
+       void                    *i_ilock_ra;    /* current ilock ret addr */
+
+       xfs_dinode_core_t       i_d;            /* most of ondisk inode */
+       xfs_chashlist_t         *i_chash;       /* cluster hash list header */
+       struct xfs_inode        *i_cnext;       /* cluster hash link forward */
+       struct xfs_inode        *i_cprev;       /* cluster hash link backward */
+
+#ifdef DEBUG
+       /* Trace buffers per inode. */
+       struct ktrace           *i_xtrace;      /* inode extent list trace */
+       struct ktrace           *i_btrace;      /* inode bmap btree trace */
+       struct ktrace           *i_rwtrace;     /* inode read/write trace */
+       struct ktrace           *i_strat_trace; /* inode strat_write trace */
+       struct ktrace           *i_lock_trace;  /* inode lock/unlock trace */
+       struct ktrace           *i_dir_trace;   /* inode directory trace */
+#endif /* DEBUG */
+} xfs_inode_t;
+
+#endif /* __KERNEL__ */
+
+
+/*
+ * Fork handling.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IFORK_PTR)
+xfs_ifork_t *xfs_ifork_ptr(xfs_inode_t *ip, int w);
+#define        XFS_IFORK_PTR(ip,w)             xfs_ifork_ptr(ip,w)
+#else
+#define        XFS_IFORK_PTR(ip,w)   ((w) == XFS_DATA_FORK ? &(ip)->i_df : (ip)->i_afp)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IFORK_Q)
+int xfs_ifork_q(xfs_inode_t *ip);
+#define        XFS_IFORK_Q(ip)                 xfs_ifork_q(ip)
+#else
+#define        XFS_IFORK_Q(ip)                 XFS_CFORK_Q(&(ip)->i_d)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IFORK_DSIZE)
+int xfs_ifork_dsize(xfs_inode_t *ip);
+#define        XFS_IFORK_DSIZE(ip)             xfs_ifork_dsize(ip)
+#else
+#define        XFS_IFORK_DSIZE(ip)             XFS_CFORK_DSIZE(&ip->i_d, ip->i_mount)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IFORK_ASIZE)
+int xfs_ifork_asize(xfs_inode_t *ip);
+#define        XFS_IFORK_ASIZE(ip)             xfs_ifork_asize(ip)
+#else
+#define        XFS_IFORK_ASIZE(ip)             XFS_CFORK_ASIZE(&ip->i_d, ip->i_mount)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IFORK_SIZE)
+int xfs_ifork_size(xfs_inode_t *ip, int w);
+#define        XFS_IFORK_SIZE(ip,w)            xfs_ifork_size(ip,w)
+#else
+#define        XFS_IFORK_SIZE(ip,w)            XFS_CFORK_SIZE(&ip->i_d, ip->i_mount, w)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IFORK_FORMAT)
+int xfs_ifork_format(xfs_inode_t *ip, int w);
+#define        XFS_IFORK_FORMAT(ip,w)          xfs_ifork_format(ip,w)
+#else
+#define        XFS_IFORK_FORMAT(ip,w)          XFS_CFORK_FORMAT(&ip->i_d, w)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IFORK_FMT_SET)
+void xfs_ifork_fmt_set(xfs_inode_t *ip, int w, int n);
+#define        XFS_IFORK_FMT_SET(ip,w,n)       xfs_ifork_fmt_set(ip,w,n)
+#else
+#define        XFS_IFORK_FMT_SET(ip,w,n)       XFS_CFORK_FMT_SET(&ip->i_d, w, n)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IFORK_NEXTENTS)
+int xfs_ifork_nextents(xfs_inode_t *ip, int w);
+#define        XFS_IFORK_NEXTENTS(ip,w)        xfs_ifork_nextents(ip,w)
+#else
+#define        XFS_IFORK_NEXTENTS(ip,w)        XFS_CFORK_NEXTENTS(&ip->i_d, w)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IFORK_NEXT_SET)
+void xfs_ifork_next_set(xfs_inode_t *ip, int w, int n);
+#define        XFS_IFORK_NEXT_SET(ip,w,n)      xfs_ifork_next_set(ip,w,n)
+#else
+#define        XFS_IFORK_NEXT_SET(ip,w,n)      XFS_CFORK_NEXT_SET(&ip->i_d, w, n)
+#endif
+
+
+#ifdef __KERNEL__
+
+/*
+ * In-core inode flags.
+ */
+#define XFS_IGRIO      0x0001  /* inode used for guaranteed rate i/o */
+#define XFS_IUIOSZ     0x0002  /* inode i/o sizes have been explicitly set */
+#define XFS_IQUIESCE    0x0004  /* we have started quiescing for this inode */
+#define XFS_IRECLAIM    0x0008  /* we have started reclaiming this inode    */
+
+/*
+ * Flags for inode locking.
+ */
+#define        XFS_IOLOCK_EXCL         0x001
+#define        XFS_IOLOCK_SHARED       0x002
+#define        XFS_ILOCK_EXCL          0x004
+#define        XFS_ILOCK_SHARED        0x008
+#define        XFS_IUNLOCK_NONOTIFY    0x010
+#define XFS_IOLOCK_NESTED      0x020
+#define XFS_EXTENT_TOKEN_RD    0x040
+#define XFS_SIZE_TOKEN_RD      0x080
+#define XFS_EXTSIZE_RD         (XFS_EXTENT_TOKEN_RD|XFS_SIZE_TOKEN_RD)
+#define XFS_WILLLEND           0x100   /* Always acquire tokens for lending */
+#define XFS_EXTENT_TOKEN_WR    (XFS_EXTENT_TOKEN_RD | XFS_WILLLEND)
+#define XFS_SIZE_TOKEN_WR       (XFS_SIZE_TOKEN_RD | XFS_WILLLEND)
+#define XFS_EXTSIZE_WR         (XFS_EXTSIZE_RD | XFS_WILLLEND)
+
+
+#define XFS_LOCK_MASK  \
+       (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL | \
+        XFS_IOLOCK_NESTED | \
+        XFS_ILOCK_SHARED | XFS_EXTENT_TOKEN_RD | XFS_SIZE_TOKEN_RD | \
+        XFS_WILLLEND)
+
+/*
+ * Flags for xfs_iflush()
+ */
+#define        XFS_IFLUSH_DELWRI_ELSE_SYNC     1
+#define        XFS_IFLUSH_DELWRI_ELSE_ASYNC    2
+#define        XFS_IFLUSH_SYNC                 3
+#define        XFS_IFLUSH_ASYNC                4
+#define        XFS_IFLUSH_DELWRI               5
+
+/*
+ * Flags for xfs_iflush_all.
+ */
+#define        XFS_FLUSH_ALL           0x1
+
+/*
+ * Flags for xfs_itruncate_start().
+ */
+#define        XFS_ITRUNC_DEFINITE     0x1
+#define        XFS_ITRUNC_MAYBE        0x2
+
+/*
+ * Maximum file size.
+ * if XFS_BIG_FILES 2^63 - 1 (largest positive value of xfs_fsize_t)
+ * else 2^40 - 1 (40=31+9) (might be an int holding a block #)
+ * Note, we allow seeks to this offset, although you can't read or write.
+ * For the not XFS_BIG_FILES case, the value could be 1 higher but we don't
+ * do that, for symmetry.
+ */
+#if XFS_BIG_FILES
+#define XFS_MAX_FILE_OFFSET    ((long long)((1ULL<<63)-1ULL))
+#else
+#define        XFS_MAX_FILE_OFFSET     ((1LL<<40)-1LL)
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ITOV)
+struct vnode *xfs_itov(xfs_inode_t *ip);
+#define        XFS_ITOV(ip)            xfs_itov(ip)
+#else
+#define        XFS_ITOV(ip)            BHV_TO_VNODE(XFS_ITOBHV(ip))
+#endif
+#define        XFS_ITOV_NULL(ip)       BHV_TO_VNODE_NULL(XFS_ITOBHV(ip))
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ITOBHV)
+struct bhv_desc *xfs_itobhv(xfs_inode_t *ip);
+#define        XFS_ITOBHV(ip)          xfs_itobhv(ip)
+#else
+#define        XFS_ITOBHV(ip)          ((struct bhv_desc *)(&((ip)->i_bhv_desc)))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BHVTOI)
+xfs_inode_t *xfs_bhvtoi(struct bhv_desc *bhvp);
+#define        XFS_BHVTOI(bhvp)        xfs_bhvtoi(bhvp)
+#else
+#define        XFS_BHVTOI(bhvp)        \
+       ((xfs_inode_t *)((char *)(bhvp) - \
+                        (char *)&(((xfs_inode_t *)0)->i_bhv_desc)))
+#endif
+
+#define BHV_IS_XFS(bdp)                (BHV_OPS(bdp) == &xfs_vnodeops)
+
+/*
+ * Pick the inode cluster hash bucket
+ * (m_chash is the same size as m_ihash)
+ */
+#define XFS_CHASH(mp,blk) ((mp)->m_chash + (((uint)blk) % (mp)->m_chsize))
+
+
+/*
+ * xfs_iget.c prototypes.
+ */
+void           xfs_ihash_init(struct xfs_mount *);
+void           xfs_ihash_free(struct xfs_mount *);
+void           xfs_chash_init(struct xfs_mount *);
+void           xfs_chash_free(struct xfs_mount *);
+xfs_inode_t    *xfs_inode_incore(struct xfs_mount *, xfs_ino_t,
+                                 struct xfs_trans *);
+void            xfs_inode_lock_init(xfs_inode_t *, struct vnode *);
+int            xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
+                        uint, xfs_inode_t **, xfs_daddr_t);
+int            xfs_vn_iget(struct vnode    *, struct xfs_mount *,
+                        struct xfs_trans *, xfs_ino_t,
+                        uint, xfs_inode_t **, xfs_daddr_t);
+void           xfs_iput(xfs_inode_t *, uint);
+void           xfs_ilock(xfs_inode_t *, uint);
+int            xfs_ilock_nowait(xfs_inode_t *, uint);
+void           xfs_iunlock(xfs_inode_t *, uint);
+void           xfs_ilock_demote(xfs_inode_t *, uint);
+void           xfs_iflock(xfs_inode_t *);
+int            xfs_iflock_nowait(xfs_inode_t *);
+uint           xfs_ilock_map_shared(xfs_inode_t *);
+void           xfs_iunlock_map_shared(xfs_inode_t *, uint);
+void           xfs_ifunlock(xfs_inode_t *);
+void           xfs_ireclaim(xfs_inode_t *);
+int            xfs_finish_reclaim(xfs_inode_t *, int);
+
+/*
+ * xfs_inode.c prototypes.
+ */
+int            xfs_inotobp(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
+                           xfs_dinode_t **, struct xfs_buf **, int *);
+int            xfs_itobp(struct xfs_mount *, struct xfs_trans *,
+                         xfs_inode_t *, xfs_dinode_t **, struct xfs_buf **,
+                         xfs_daddr_t);
+int            xfs_iread(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
+                         xfs_inode_t **, xfs_daddr_t);
+int            xfs_iread_extents(struct xfs_trans *, xfs_inode_t *, int);
+int            xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t, nlink_t,
+                          dev_t, struct cred *, xfs_prid_t, int,
+                          struct xfs_buf **, boolean_t *, xfs_inode_t **);
+void           xfs_xlate_dinode_core(xfs_caddr_t, struct xfs_dinode_core *, int,
+                          xfs_arch_t);
+int            xfs_ifree(struct xfs_trans *, xfs_inode_t *);
+int            xfs_atruncate_start(xfs_inode_t *);
+void           xfs_itruncate_start(xfs_inode_t *, uint, xfs_fsize_t);
+int            xfs_itruncate_finish(struct xfs_trans **, xfs_inode_t *,
+                                    xfs_fsize_t, int, int);
+int            xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
+int            xfs_igrow_start(xfs_inode_t *, xfs_fsize_t, struct cred *);
+void           xfs_igrow_finish(struct xfs_trans *, xfs_inode_t *,
+                                xfs_fsize_t, int);
+
+void           xfs_idestroy_fork(xfs_inode_t *, int);
+void           xfs_idestroy(xfs_inode_t *);
+void           xfs_idata_realloc(xfs_inode_t *, int, int);
+void           xfs_iextract(xfs_inode_t *);
+void           xfs_iext_realloc(xfs_inode_t *, int, int);
+void           xfs_iroot_realloc(xfs_inode_t *, int, int);
+void           xfs_ipin(xfs_inode_t *);
+void           xfs_iunpin(xfs_inode_t *);
+unsigned int   xfs_ipincount(xfs_inode_t *);
+int            xfs_iextents_copy(xfs_inode_t *, xfs_bmbt_rec_32_t *, int);
+int            xfs_iflush(xfs_inode_t *, uint);
+int            xfs_iflush_all(struct xfs_mount *, int);
+int             xfs_ibusy_check(xfs_inode_t *, int);
+int            xfs_iaccess(xfs_inode_t *, mode_t);
+uint           xfs_iroundup(uint);
+void           xfs_ichgtime(xfs_inode_t *, int);
+xfs_fsize_t    xfs_file_last_byte(xfs_inode_t *);
+xfs_inode_t    *xfs_get_inode(dev_t, xfs_ino_t);
+void           xfs_lock_inodes(xfs_inode_t **, int, int, uint);
+
+
+#ifdef DEBUG
+void           xfs_isize_check(struct xfs_mount *, xfs_inode_t *, xfs_fsize_t);
+#else  /* DEBUG */
+#define xfs_isize_check(mp, ip, isize)
+#endif /* DEBUG */
+
+#if defined(DEBUG)
+void           xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
+#else
+#define        xfs_inobp_check(mp, bp)
+#endif /* DEBUG */
+
+extern struct xfs_zone *xfs_chashlist_zone;
+extern struct xfs_zone *xfs_ifork_zone;
+extern struct xfs_zone *xfs_inode_zone;
+extern struct xfs_zone *xfs_ili_zone;
+extern struct vnodeops xfs_vnodeops;
+
+#ifdef XFS_ILOCK_TRACE
+#define XFS_ILOCK_KTRACE_SIZE  32
+void   xfs_ilock_trace(xfs_inode_t *ip, int lock, unsigned int lockflags, 
+                       inst_t *ra);
+#endif
+
+#endif /* __KERNEL__ */
+
+#endif /* __XFS_INODE_H__ */
diff --git a/include/xfs_inode_item.h b/include/xfs_inode_item.h
new file mode 100644 (file)
index 0000000..d3433aa
--- /dev/null
@@ -0,0 +1,193 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef        __XFS_INODE_ITEM_H__
+#define        __XFS_INODE_ITEM_H__
+
+/*
+ * This is the structure used to lay out an inode log item in the
+ * log.  The size of the inline data/extents/b-tree root to be logged
+ * (if any) is indicated in the ilf_dsize field.  Changes to this structure
+ * must be added on to the end.
+ *
+ * Convention for naming inode log item versions :  The current version
+ * is always named XFS_LI_INODE.  When an inode log item gets superseded,
+ * add the latest version of IRIX that will generate logs with that item
+ * to the version name.
+ *
+ * -Version 1 of this structure (XFS_LI_5_3_INODE) included up to the first
+ *     union (ilf_u) field.  This was released with IRIX 5.3-XFS.
+ * -Version 2 of this structure (XFS_LI_6_1_INODE) is currently the entire
+ *     structure.  This was released with IRIX 6.0.1-XFS and IRIX 6.1.
+ * -Version 3 of this structure (XFS_LI_INODE) is the same as version 2
+ *     so a new structure definition wasn't necessary.  However, we had
+ *     to add a new type because the inode cluster size changed from 4K
+ *     to 8K and the version number had to be rev'ved to keep older kernels
+ *     from trying to recover logs with the 8K buffers in them.  The logging
+ *     code can handle recovery on different-sized clusters now so hopefully
+ *     this'll be the last time we need to change the inode log item just
+ *     for a change in the inode cluster size.  This new version was
+ *     released with IRIX 6.2.
+ */
+typedef struct xfs_inode_log_format {
+       unsigned short          ilf_type;       /* inode log item type */
+       unsigned short          ilf_size;       /* size of this item */
+       uint                    ilf_fields;     /* flags for fields logged */
+       ushort                  ilf_asize;      /* size of attr d/ext/root */
+       ushort                  ilf_dsize;      /* size of data/ext/root */
+       xfs_ino_t               ilf_ino;        /* inode number */
+       union {
+               xfs_dev_t       ilfu_rdev;      /* rdev value for dev inode*/
+               uuid_t          ilfu_uuid;      /* mount point value */
+       } ilf_u;
+       __int64_t               ilf_blkno;      /* blkno of inode buffer */
+       int                     ilf_len;        /* len of inode buffer */
+       int                     ilf_boffset;    /* off of inode in buffer */
+} xfs_inode_log_format_t;
+
+/* Initial version shipped with IRIX 5.3-XFS */
+typedef struct xfs_inode_log_format_v1 {
+       unsigned short          ilf_type;       /* inode log item type */
+       unsigned short          ilf_size;       /* size of this item */
+       uint                    ilf_fields;     /* flags for fields logged */
+       uint                    ilf_dsize;      /* size of data/ext/root */
+       xfs_ino_t               ilf_ino;        /* inode number */
+       union {
+               xfs_dev_t       ilfu_rdev;      /* rdev value for dev inode*/
+               uuid_t          ilfu_uuid;      /* mount point value */
+       } ilf_u;
+} xfs_inode_log_format_t_v1;
+
+/*
+ * Flags for xfs_trans_log_inode flags field.
+ */
+#define        XFS_ILOG_CORE   0x001   /* log standard inode fields */
+#define        XFS_ILOG_DDATA  0x002   /* log i_df.if_data */
+#define        XFS_ILOG_DEXT   0x004   /* log i_df.if_extents */
+#define        XFS_ILOG_DBROOT 0x008   /* log i_df.i_broot */
+#define        XFS_ILOG_DEV    0x010   /* log the dev field */
+#define        XFS_ILOG_UUID   0x020   /* log the uuid field */
+#define        XFS_ILOG_ADATA  0x040   /* log i_af.if_data */
+#define        XFS_ILOG_AEXT   0x080   /* log i_af.if_extents */
+#define        XFS_ILOG_ABROOT 0x100   /* log i_af.i_broot */
+
+#define        XFS_ILOG_NONCORE        (XFS_ILOG_DDATA | XFS_ILOG_DEXT | \
+                                XFS_ILOG_DBROOT | XFS_ILOG_DEV | \
+                                XFS_ILOG_UUID | XFS_ILOG_ADATA | \
+                                XFS_ILOG_AEXT | XFS_ILOG_ABROOT)
+
+#define        XFS_ILOG_DFORK          (XFS_ILOG_DDATA | XFS_ILOG_DEXT | \
+                                XFS_ILOG_DBROOT)
+
+#define        XFS_ILOG_AFORK          (XFS_ILOG_ADATA | XFS_ILOG_AEXT | \
+                                XFS_ILOG_ABROOT)
+
+#define        XFS_ILOG_ALL            (XFS_ILOG_CORE | XFS_ILOG_DDATA | \
+                                XFS_ILOG_DEXT | XFS_ILOG_DBROOT | \
+                                XFS_ILOG_DEV | XFS_ILOG_UUID | \
+                                XFS_ILOG_ADATA | XFS_ILOG_AEXT | \
+                                XFS_ILOG_ABROOT)
+
+#define        XFS_ILI_HOLD            0x1
+#define        XFS_ILI_IOLOCKED_EXCL   0x2
+#define        XFS_ILI_IOLOCKED_SHARED 0x4
+
+#define        XFS_ILI_IOLOCKED_ANY   (XFS_ILI_IOLOCKED_EXCL | XFS_ILI_IOLOCKED_SHARED)
+
+
+#ifdef __KERNEL__
+
+struct xfs_buf;
+struct xfs_bmbt_rec_32;
+struct xfs_inode;
+struct xfs_mount;
+
+
+typedef struct xfs_inode_log_item {
+       xfs_log_item_t          ili_item;          /* common portion */
+       struct xfs_inode        *ili_inode;        /* inode ptr */
+       xfs_lsn_t               ili_flush_lsn;     /* lsn at last flush */
+       xfs_lsn_t               ili_last_lsn;      /* lsn at last transaction */
+       unsigned short          ili_ilock_recur;   /* lock recursion count */
+       unsigned short          ili_iolock_recur;  /* lock recursion count */
+       unsigned short          ili_flags;         /* misc flags */
+       unsigned short          ili_logged;        /* flushed logged data */
+       unsigned int            ili_last_fields;   /* fields when flushed */
+       struct xfs_bmbt_rec_32  *ili_extents_buf;  /* array of logged exts */
+       unsigned int            ili_pushbuf_flag;  /* one bit used in push_ail */
+
+#ifdef DEBUG
+       uint64_t                ili_push_owner;    /* one who sets pushbuf_flag
+                                                     above gets to push the buf */
+#endif
+#ifdef XFS_TRANS_DEBUG
+       int                     ili_root_size;
+       char                    *ili_orig_root;
+#endif
+       xfs_inode_log_format_t  ili_format;        /* logged structure */
+} xfs_inode_log_item_t;
+
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ILOG_FDATA)
+int xfs_ilog_fdata(int w);
+#define        XFS_ILOG_FDATA(w)       xfs_ilog_fdata(w)
+#else
+#define        XFS_ILOG_FDATA(w)       \
+       ((w) == XFS_DATA_FORK ? XFS_ILOG_DDATA : XFS_ILOG_ADATA)
+#endif
+
+#endif /* __KERNEL__ */
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ILOG_FBROOT)
+int xfs_ilog_fbroot(int w);
+#define        XFS_ILOG_FBROOT(w)      xfs_ilog_fbroot(w)
+#else
+#define        XFS_ILOG_FBROOT(w)      \
+       ((w) == XFS_DATA_FORK ? XFS_ILOG_DBROOT : XFS_ILOG_ABROOT)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ILOG_FEXT)
+int xfs_ilog_fext(int w);
+#define        XFS_ILOG_FEXT(w)        xfs_ilog_fext(w)
+#else
+#define        XFS_ILOG_FEXT(w)        \
+       ((w) == XFS_DATA_FORK ? XFS_ILOG_DEXT : XFS_ILOG_AEXT)
+#endif
+
+#ifdef __KERNEL__
+
+void   xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *);
+void   xfs_inode_item_destroy(struct xfs_inode *);
+void   xfs_iflush_done(struct xfs_buf *, xfs_inode_log_item_t *);
+void   xfs_iflush_abort(struct xfs_inode *);
+
+#endif /* __KERNEL__ */
+
+#endif /* __XFS_INODE_ITEM_H__ */
diff --git a/include/xfs_inum.h b/include/xfs_inum.h
new file mode 100644 (file)
index 0000000..fb3ec3c
--- /dev/null
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_INUM_H__
+#define        __XFS_INUM_H__
+
+/*
+ * Inode number format:
+ * low inopblog bits - offset in block
+ * next agblklog bits - block number in ag
+ * next agno_log bits - ag number
+ * high agno_log-agblklog-inopblog bits - 0
+ */
+
+typedef        __uint32_t      xfs_agino_t;    /* within allocation grp inode number */
+
+/*
+ * Useful inode bits for this kernel.
+ * Used in some places where having 64-bits in the 32-bit kernels
+ * costs too much.
+ */
+#if XFS_BIG_FILESYSTEMS
+typedef        xfs_ino_t       xfs_intino_t;
+#else
+typedef        __uint32_t      xfs_intino_t;
+#endif
+
+#define        NULLFSINO       ((xfs_ino_t)-1)
+#define        NULLAGINO       ((xfs_agino_t)-1)
+
+struct xfs_mount;
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INO_MASK)
+__uint32_t xfs_ino_mask(int k);
+#define        XFS_INO_MASK(k)                 xfs_ino_mask(k)
+#else
+#define        XFS_INO_MASK(k) ((__uint32_t)((1ULL << (k)) - 1))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INO_OFFSET_BITS)
+int xfs_ino_offset_bits(struct xfs_mount *mp);
+#define        XFS_INO_OFFSET_BITS(mp)         xfs_ino_offset_bits(mp)
+#else
+#define        XFS_INO_OFFSET_BITS(mp) ((mp)->m_sb.sb_inopblog)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INO_AGBNO_BITS)
+int xfs_ino_agbno_bits(struct xfs_mount *mp);
+#define        XFS_INO_AGBNO_BITS(mp)          xfs_ino_agbno_bits(mp)
+#else
+#define        XFS_INO_AGBNO_BITS(mp)  ((mp)->m_sb.sb_agblklog)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INO_AGINO_BITS)
+int xfs_ino_agino_bits(struct xfs_mount *mp);
+#define        XFS_INO_AGINO_BITS(mp)          xfs_ino_agino_bits(mp)
+#else
+#define        XFS_INO_AGINO_BITS(mp)          ((mp)->m_agino_log)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INO_AGNO_BITS)
+int xfs_ino_agno_bits(struct xfs_mount *mp);
+#define        XFS_INO_AGNO_BITS(mp)           xfs_ino_agno_bits(mp)
+#else
+#define        XFS_INO_AGNO_BITS(mp)   ((mp)->m_agno_log)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INO_BITS)
+int xfs_ino_bits(struct xfs_mount *mp);
+#define        XFS_INO_BITS(mp)                xfs_ino_bits(mp)
+#else
+#define        XFS_INO_BITS(mp)        (XFS_INO_AGNO_BITS(mp) + XFS_INO_AGINO_BITS(mp))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INO_TO_AGNO)
+xfs_agnumber_t xfs_ino_to_agno(struct xfs_mount *mp, xfs_ino_t i);
+#define        XFS_INO_TO_AGNO(mp,i)           xfs_ino_to_agno(mp,i)
+#else
+#define        XFS_INO_TO_AGNO(mp,i)   \
+       ((xfs_agnumber_t)((i) >> XFS_INO_AGINO_BITS(mp)))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INO_TO_AGINO)
+xfs_agino_t xfs_ino_to_agino(struct xfs_mount *mp, xfs_ino_t i);
+#define        XFS_INO_TO_AGINO(mp,i)          xfs_ino_to_agino(mp,i)
+#else
+#define        XFS_INO_TO_AGINO(mp,i)  \
+       ((xfs_agino_t)(i) & XFS_INO_MASK(XFS_INO_AGINO_BITS(mp)))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INO_TO_AGBNO)
+xfs_agblock_t xfs_ino_to_agbno(struct xfs_mount *mp, xfs_ino_t i);
+#define        XFS_INO_TO_AGBNO(mp,i)          xfs_ino_to_agbno(mp,i)
+#else
+#define        XFS_INO_TO_AGBNO(mp,i)  \
+       (((xfs_agblock_t)(i) >> XFS_INO_OFFSET_BITS(mp)) & \
+        XFS_INO_MASK(XFS_INO_AGBNO_BITS(mp)))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INO_TO_OFFSET)
+int xfs_ino_to_offset(struct xfs_mount *mp, xfs_ino_t i);
+#define        XFS_INO_TO_OFFSET(mp,i)         xfs_ino_to_offset(mp,i)
+#else
+#define        XFS_INO_TO_OFFSET(mp,i) \
+       ((int)(i) & XFS_INO_MASK(XFS_INO_OFFSET_BITS(mp)))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INO_TO_FSB)
+xfs_fsblock_t xfs_ino_to_fsb(struct xfs_mount *mp, xfs_ino_t i);
+#define        XFS_INO_TO_FSB(mp,i)            xfs_ino_to_fsb(mp,i)
+#else
+#define        XFS_INO_TO_FSB(mp,i)    \
+       XFS_AGB_TO_FSB(mp, XFS_INO_TO_AGNO(mp,i), XFS_INO_TO_AGBNO(mp,i))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGINO_TO_INO)
+xfs_ino_t
+xfs_agino_to_ino(struct xfs_mount *mp, xfs_agnumber_t a, xfs_agino_t i);
+#define        XFS_AGINO_TO_INO(mp,a,i)        xfs_agino_to_ino(mp,a,i)
+#else
+#define        XFS_AGINO_TO_INO(mp,a,i)        \
+       (((xfs_ino_t)(a) << XFS_INO_AGINO_BITS(mp)) | (i))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGINO_TO_AGBNO)
+xfs_agblock_t xfs_agino_to_agbno(struct xfs_mount *mp, xfs_agino_t i);
+#define        XFS_AGINO_TO_AGBNO(mp,i)        xfs_agino_to_agbno(mp,i)
+#else
+#define        XFS_AGINO_TO_AGBNO(mp,i)        ((i) >> XFS_INO_OFFSET_BITS(mp))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGINO_TO_OFFSET)
+int xfs_agino_to_offset(struct xfs_mount *mp, xfs_agino_t i);
+#define        XFS_AGINO_TO_OFFSET(mp,i)       xfs_agino_to_offset(mp,i)
+#else
+#define        XFS_AGINO_TO_OFFSET(mp,i)       \
+       ((i) & XFS_INO_MASK(XFS_INO_OFFSET_BITS(mp)))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_OFFBNO_TO_AGINO)
+xfs_agino_t xfs_offbno_to_agino(struct xfs_mount *mp, xfs_agblock_t b, int o);
+#define        XFS_OFFBNO_TO_AGINO(mp,b,o)     xfs_offbno_to_agino(mp,b,o)
+#else
+#define        XFS_OFFBNO_TO_AGINO(mp,b,o)     \
+       ((xfs_agino_t)(((b) << XFS_INO_OFFSET_BITS(mp)) | (o)))
+#endif
+
+#if XFS_BIG_FILESYSTEMS
+#define        XFS_MAXINUMBER          ((xfs_ino_t)((1ULL << 56) - 1ULL))
+#define        XFS_INO64_OFFSET        ((xfs_ino_t)(1ULL << 32))
+#else
+#define        XFS_MAXINUMBER          ((xfs_ino_t)((1ULL << 32) - 1ULL))
+#endif
+#define        XFS_MAXINUMBER_32       ((xfs_ino_t)((1ULL << 32) - 1ULL))
+
+#endif /* __XFS_INUM_H__ */
diff --git a/include/xfs_log.h b/include/xfs_log.h
new file mode 100644 (file)
index 0000000..c333cef
--- /dev/null
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef        __XFS_LOG_H__
+#define __XFS_LOG_H__
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define LSN_FIELD_CYCLE(arch) (((arch)==ARCH_NOCONVERT)?1:0)
+#define LSN_FIELD_BLOCK(arch) (((arch)==ARCH_NOCONVERT)?0:1)
+#else
+#define LSN_FIELD_CYCLE(arch) (0)
+#define LSN_FIELD_BLOCK(arch) (1)
+#endif
+
+/* get lsn fields */
+    
+#define CYCLE_LSN(lsn,arch) (INT_GET(((uint *)&(lsn))[LSN_FIELD_CYCLE(arch)], arch))
+#define BLOCK_LSN(lsn,arch) (INT_GET(((uint *)&(lsn))[LSN_FIELD_BLOCK(arch)], arch))
+
+#ifdef __KERNEL__
+/*
+ * By comparing each compnent, we don't have to worry about extra
+ * endian issues in treating two 32 bit numbers as one 64 bit number
+ */
+static inline xfs_lsn_t        _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2, xfs_arch_t arch)
+{
+       if (CYCLE_LSN(lsn1, arch) != CYCLE_LSN(lsn2, arch))
+               return (CYCLE_LSN(lsn1, arch)<CYCLE_LSN(lsn2, arch))? -999 : 999;
+
+        if (BLOCK_LSN(lsn1, arch) != BLOCK_LSN(lsn2, arch))
+                return (BLOCK_LSN(lsn1, arch)<BLOCK_LSN(lsn2, arch))? -999 : 999;
+        
+        return 0;
+}
+
+#define        XFS_LSN_CMP_ARCH(x,y,arch)      _lsn_cmp(x, y, arch)
+#define        XFS_LSN_CMP(x,y) XFS_LSN_CMP_ARCH(x,y,ARCH_NOCONVERT)
+#define        XFS_LSN_DIFF_ARCH(x,y,arch)     _lsn_cmp(x, y, arch)
+#define        XFS_LSN_DIFF(x,y) XFS_LSN_DIFF_ARCH(x,y,ARCH_NOCONVERT)
+
+/*
+ * Macros, structures, prototypes for interface to the log manager.
+ */
+
+/*
+ * Flags to xfs_log_mount
+ */
+#define XFS_LOG_RECOVER                0x1
+
+/*
+ * Flags to xfs_log_done()
+ */
+#define XFS_LOG_REL_PERM_RESERV        0x1
+
+
+/*
+ * Flags to xfs_log_reserve()
+ *
+ *     XFS_LOG_SLEEP:   If space is not available, sleep (default)
+ *     XFS_LOG_NOSLEEP: If space is not available, return error
+ *     XFS_LOG_PERM_RESERV: Permanent reservation.  When writes are
+ *             performed against this type of reservation, the reservation
+ *             is not decreased.  Long running transactions should use this.
+ */
+#define XFS_LOG_SLEEP          0x0
+#define XFS_LOG_NOSLEEP                0x1
+#define XFS_LOG_PERM_RESERV    0x2
+#define XFS_LOG_RESV_ALL       (XFS_LOG_NOSLEEP|XFS_LOG_PERM_RESERV)
+
+
+/*
+ * Flags to xfs_log_force()
+ *
+ *     XFS_LOG_SYNC:   Synchronous force in-core log to disk
+ *     XFS_LOG_FORCE:  Start in-core log write now.
+ *     XFS_LOG_URGE:   Start write within some window of time.
+ *
+ * Note: Either XFS_LOG_FORCE or XFS_LOG_URGE must be set.
+ */
+#define XFS_LOG_SYNC           0x1
+#define XFS_LOG_FORCE          0x2
+#define XFS_LOG_URGE           0x4
+
+#endif /* __KERNEL__ */
+
+
+/* Log Clients */
+#define XFS_TRANSACTION                0x69
+#define XFS_VOLUME             0x2
+#define XFS_LOG                        0xaa
+
+typedef struct xfs_log_iovec {
+       xfs_caddr_t             i_addr;         /* beginning address of region */
+       int             i_len;          /* length in bytes of region */
+} xfs_log_iovec_t;
+
+typedef void* xfs_log_ticket_t;
+
+/*
+ * Structure used to pass callback function and the function's argument
+ * to the log manager.
+ */
+typedef struct xfs_log_callback {
+       struct xfs_log_callback *cb_next;
+       void                    (*cb_func)(void *, int);
+       void                    *cb_arg;
+} xfs_log_callback_t;
+
+
+#ifdef __KERNEL__
+/* Log manager interfaces */
+struct xfs_mount;
+xfs_lsn_t xfs_log_done(struct xfs_mount *mp,
+                      xfs_log_ticket_t ticket,
+                      uint             flags);
+int      xfs_log_force(struct xfs_mount *mp,
+                       xfs_lsn_t        lsn,
+                       uint             flags);
+int      xfs_log_init(void);
+int      xfs_log_mount(struct xfs_mount *mp,
+                       dev_t            log_dev,
+                       xfs_daddr_t              start_block,
+                       int              num_bblocks);
+int      xfs_log_mount_finish(struct xfs_mount *mp, int);
+void     xfs_log_move_tail(struct xfs_mount    *mp,
+                           xfs_lsn_t           tail_lsn);
+void     xfs_log_notify(struct xfs_mount       *mp,
+                        xfs_lsn_t              lsn,
+                        xfs_log_callback_t     *callback_entry);
+int      xfs_log_reserve(struct xfs_mount *mp,
+                         int              length,
+                         int              count,
+                         xfs_log_ticket_t *ticket,
+                         char             clientid,
+                         uint             flags);
+int      xfs_log_write(struct xfs_mount *mp,
+                       xfs_log_iovec_t  region[],
+                       int              nentries,
+                       xfs_log_ticket_t ticket,
+                       xfs_lsn_t        *start_lsn);
+int      xfs_log_unmount(struct xfs_mount *mp);
+int      xfs_log_unmount_write(struct xfs_mount *mp);
+void      xfs_log_unmount_dealloc(struct xfs_mount *mp);
+int      xfs_log_force_umount(struct xfs_mount *mp, int logerror);
+int      xfs_log_need_covered(struct xfs_mount *mp);
+
+void     xlog_iodone(struct xfs_buf *);
+
+#endif
+
+
+extern int xlog_debug;         /* set to 1 to enable real log */
+
+
+#endif /* __XFS_LOG_H__ */
diff --git a/include/xfs_log_priv.h b/include/xfs_log_priv.h
new file mode 100644 (file)
index 0000000..c4f8b11
--- /dev/null
@@ -0,0 +1,540 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef        __XFS_LOG_PRIV_H__
+#define __XFS_LOG_PRIV_H__
+
+#if defined(XFS_ALL_TRACE)
+#define        XFS_LOG_TRACE
+#endif
+
+#if !defined(DEBUG)
+#undef XFS_LOG_TRACE
+#endif
+
+struct xfs_buf;
+struct ktrace;
+struct log;
+struct xfs_buf_cancel;
+struct xfs_mount;
+
+/*
+ * Macros, structures, prototypes for internal log manager use.
+ */
+
+#define XLOG_NUM_ICLOGS                2
+#define XLOG_MAX_ICLOGS                4
+#define XLOG_CALLBACK_SIZE     10
+#define XLOG_HEADER_MAGIC_NUM  0xFEEDbabe      /* Illegal cycle number */
+#define XLOG_RECORD_BSIZE      (16*1024)       /* eventually 32k */
+#define XLOG_MAX_RECORD_BSIZE  (32*1024)
+#define XLOG_RECORD_BSHIFT     14              /* 16384 == 1 << 14 */
+#define XLOG_MAX_RECORD_BSHIFT 15              /* 32k == 1 << 15 */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XLOG_BTOLRBB)
+int xlog_btolrbb(int b);
+#define XLOG_BTOLRBB(b)                xlog_btolrbb(b)
+#else
+#define XLOG_BTOLRBB(b)                (((b)+XLOG_RECORD_BSIZE-1) >> XLOG_RECORD_BSHIFT)
+#endif
+
+#define XLOG_HEADER_SIZE       512
+
+/*
+ *  set lsns
+ */
+
+#define ASSIGN_LSN_CYCLE(lsn,cycle,arch) \
+    INT_SET(((uint *)&(lsn))[LSN_FIELD_CYCLE(arch)], arch, (cycle));
+#define ASSIGN_LSN_BLOCK(lsn,block,arch) \
+    INT_SET(((uint *)&(lsn))[LSN_FIELD_BLOCK(arch)], arch, (block));
+#define ASSIGN_ANY_LSN(lsn,cycle,block,arch)  \
+    { \
+        ASSIGN_LSN_CYCLE(lsn,cycle,arch); \
+        ASSIGN_LSN_BLOCK(lsn,block,arch); \
+    }
+#define ASSIGN_LSN(lsn,log,arch) \
+    ASSIGN_ANY_LSN(lsn,(log)->l_curr_cycle,(log)->l_curr_block,arch);
+    
+#define XLOG_SET(f,b)          (((f) & (b)) == (b))
+
+#define GET_CYCLE(ptr, arch) \
+    (INT_GET(*(uint *)(ptr), arch) == XLOG_HEADER_MAGIC_NUM ? \
+         INT_GET(*((uint *)(ptr)+1), arch) : \
+         INT_GET(*(uint *)(ptr), arch) \
+    )
+    
+#define BLK_AVG(blk1, blk2)    ((blk1+blk2) >> 1)
+
+
+#ifdef __KERNEL__
+/*
+ * get client id from packed copy.
+ *
+ * this hack is here because the xlog_pack code copies four bytes
+ * of xlog_op_header containing the fields oh_clientid, oh_flags 
+ * and oh_res2 into the packed copy.
+ *
+ * later on this four byte chunk is treated as an int and the 
+ * client id is pulled out.
+ *
+ * this has endian issues, of course.
+ */    
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define GET_CLIENT_ID(i,arch) \
+    ((i) & 0xff)
+#else
+#define GET_CLIENT_ID(i,arch) \
+    ((i) >> 24)
+#endif
+   
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XLOG_GRANT_SUB_SPACE)
+void xlog_grant_sub_space(struct log *log, int bytes, int type);
+#define XLOG_GRANT_SUB_SPACE(log,bytes,type)   \
+       xlog_grant_sub_space(log,bytes,type)
+#else
+#define XLOG_GRANT_SUB_SPACE(log,bytes,type)                           \
+    {                                                                  \
+       if (type == 'w') {                                              \
+               (log)->l_grant_write_bytes -= (bytes);                  \
+               if ((log)->l_grant_write_bytes < 0) {                   \
+                       (log)->l_grant_write_bytes += (log)->l_logsize; \
+                       (log)->l_grant_write_cycle--;                   \
+               }                                                       \
+       } else {                                                        \
+               (log)->l_grant_reserve_bytes -= (bytes);                \
+               if ((log)->l_grant_reserve_bytes < 0) {                 \
+                       (log)->l_grant_reserve_bytes += (log)->l_logsize;\
+                       (log)->l_grant_reserve_cycle--;                 \
+               }                                                       \
+        }                                                              \
+    }
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XLOG_GRANT_ADD_SPACE)
+void xlog_grant_add_space(struct log *log, int bytes, int type);
+#define XLOG_GRANT_ADD_SPACE(log,bytes,type)   \
+       xlog_grant_add_space(log,bytes,type)
+#else
+#define XLOG_GRANT_ADD_SPACE(log,bytes,type)                           \
+    {                                                                  \
+       if (type == 'w') {                                              \
+               (log)->l_grant_write_bytes += (bytes);                  \
+               if ((log)->l_grant_write_bytes > (log)->l_logsize) {    \
+                       (log)->l_grant_write_bytes -= (log)->l_logsize; \
+                       (log)->l_grant_write_cycle++;                   \
+               }                                                       \
+       } else {                                                        \
+               (log)->l_grant_reserve_bytes += (bytes);                \
+               if ((log)->l_grant_reserve_bytes > (log)->l_logsize) {  \
+                       (log)->l_grant_reserve_bytes -= (log)->l_logsize;\
+                       (log)->l_grant_reserve_cycle++;                 \
+               }                                                       \
+        }                                                              \
+    }
+#endif
+#define XLOG_INS_TICKETQ(q,tic)                                \
+    {                                                  \
+       if (q) {                                        \
+               (tic)->t_next       = (q);              \
+               (tic)->t_prev       = (q)->t_prev;      \
+               (q)->t_prev->t_next = (tic);            \
+               (q)->t_prev         = (tic);            \
+       } else {                                        \
+               (tic)->t_prev = (tic)->t_next = (tic);  \
+               (q) = (tic);                            \
+       }                                               \
+       (tic)->t_flags |= XLOG_TIC_IN_Q;                \
+    }
+#define XLOG_DEL_TICKETQ(q,tic)                                \
+    {                                                  \
+       if ((tic) == (tic)->t_next) {                   \
+               (q) = NULL;                             \
+       } else {                                        \
+               (q) = (tic)->t_next;                    \
+               (tic)->t_next->t_prev = (tic)->t_prev;  \
+               (tic)->t_prev->t_next = (tic)->t_next;  \
+       }                                               \
+       (tic)->t_next = (tic)->t_prev = NULL;           \
+       (tic)->t_flags &= ~XLOG_TIC_IN_Q;               \
+    }
+
+
+#define GRANT_LOCK(log)                mutex_spinlock(&(log)->l_grant_lock)
+#define GRANT_UNLOCK(log, s)   mutex_spinunlock(&(log)->l_grant_lock, s)
+#define LOG_LOCK(log)          mutex_spinlock(&(log)->l_icloglock)
+#define LOG_UNLOCK(log, s)     mutex_spinunlock(&(log)->l_icloglock, s)
+
+#define xlog_panic(s)          {cmn_err(CE_PANIC, s); }
+#define xlog_exit(s)           {cmn_err(CE_PANIC, s); }
+#define xlog_warn(s)           {cmn_err(CE_WARN, s); }
+
+/*
+ * In core log state
+ */
+#define XLOG_STATE_ACTIVE    0x0001 /* Current IC log being written to */
+#define XLOG_STATE_WANT_SYNC 0x0002 /* Want to sync this iclog; no more writes */
+#define XLOG_STATE_SYNCING   0x0004 /* This IC log is syncing */
+#define XLOG_STATE_DONE_SYNC 0x0008 /* Done syncing to disk */
+#define XLOG_STATE_DO_CALLBACK \
+                            0x0010 /* Process callback functions */
+#define XLOG_STATE_CALLBACK  0x0020 /* Callback functions now */
+#define XLOG_STATE_DIRTY     0x0040 /* Dirty IC log, not ready for ACTIVE status*/
+#define XLOG_STATE_IOERROR   0x0080 /* IO error happened in sync'ing log */
+#define XLOG_STATE_ALL      0x7FFF /* All possible valid flags */
+#define XLOG_STATE_NOTUSED   0x8000 /* This IC log not being used */
+#endif /* __KERNEL__ */
+
+/*
+ * Flags to log operation header
+ *
+ * The first write of a new transaction will be preceded with a start
+ * record, XLOG_START_TRANS.  Once a transaction is committed, a commit
+ * record is written, XLOG_COMMIT_TRANS.  If a single region can not fit into
+ * the remainder of the current active in-core log, it is split up into
+ * multiple regions.  Each partial region will be marked with a
+ * XLOG_CONTINUE_TRANS until the last one, which gets marked with XLOG_END_TRANS.
+ *
+ */
+#define XLOG_START_TRANS       0x01    /* Start a new transaction */
+#define XLOG_COMMIT_TRANS      0x02    /* Commit this transaction */
+#define XLOG_CONTINUE_TRANS    0x04    /* Cont this trans into new region */
+#define XLOG_WAS_CONT_TRANS    0x08    /* Cont this trans into new region */
+#define XLOG_END_TRANS         0x10    /* End a continued transaction */
+#define XLOG_UNMOUNT_TRANS     0x20    /* Unmount a filesystem transaction */
+#define XLOG_SKIP_TRANS                (XLOG_COMMIT_TRANS | XLOG_CONTINUE_TRANS | \
+                                XLOG_WAS_CONT_TRANS | XLOG_END_TRANS | \
+                                XLOG_UNMOUNT_TRANS)
+
+#ifdef __KERNEL__
+/*
+ * Flags to log ticket
+ */
+#define XLOG_TIC_INITED                0x1     /* has been initialized */
+#define XLOG_TIC_PERM_RESERV   0x2     /* permanent reservation */
+#define XLOG_TIC_IN_Q          0x4
+#endif /* __KERNEL__ */
+
+#define XLOG_UNMOUNT_TYPE      0x556e  /* Un for Unmount */
+
+/*
+ * Flags for log structure
+ */
+#define XLOG_CHKSUM_MISMATCH   0x1     /* used only during recovery */
+#define XLOG_ACTIVE_RECOVERY   0x2     /* in the middle of recovery */
+#define        XLOG_RECOVERY_NEEDED    0x4     /* log was recovered */     
+#define XLOG_IO_ERROR          0x8     /* log hit an I/O error, and being
+                                          shutdown */
+typedef __uint32_t xlog_tid_t;
+
+
+#ifdef __KERNEL__
+/*
+ * Below are states for covering allocation transactions.
+ * By covering, we mean changing the h_tail_lsn in the last on-disk
+ * log write such that no allocation transactions will be re-done during
+ * recovery after a system crash. Recovery starts at the last on-disk
+ * log write.
+ *
+ * These states are used to insert dummy log entries to cover
+ * space allocation transactions which can undo non-transactional changes
+ * after a crash. Writes to a file with space
+ * already allocated do not result in any transactions. Allocations
+ * might include space beyond the EOF. So if we just push the EOF a
+ * little, the last transaction for the file could contain the wrong
+ * size. If there is no file system activity, after an allocation
+ * transaction, and the system crashes, the allocation transaction
+ * will get replayed and the file will be truncated. This could
+ * be hours/days/... after the allocation occurred.
+ *
+ * The fix for this is to do two dummy transactions when the
+ * system is idle. We need two dummy transaction because the h_tail_lsn
+ * in the log record header needs to point beyond the last possible
+ * non-dummy transaction. The first dummy changes the h_tail_lsn to
+ * the first transaction before the dummy. The second dummy causes
+ * h_tail_lsn to point to the first dummy. Recovery starts at h_tail_lsn.
+ * 
+ * These dummy transactions get committed when everything
+ * is idle (after there has been some activity).
+ *
+ * There are 5 states used to control this.
+ *
+ *  IDLE -- no logging has been done on the file system or
+ *             we are done covering previous transactions.
+ *  NEED -- logging has occurred and we need a dummy transaction
+ *             when the log becomes idle.
+ *  DONE -- we were in the NEED state and have committed a dummy
+ *             transaction.
+ *  NEED2 -- we detected that a dummy transaction has gone to the
+ *             on disk log with no other transactions.
+ *  DONE2 -- we committed a dummy transaction when in the NEED2 state.
+ *
+ * There are two places where we switch states:
+ *
+ * 1.) In xfs_sync, when we detect an idle log and are in NEED or NEED2.
+ *     We commit the dummy transaction and switch to DONE or DONE2,
+ *     respectively. In all other states, we don't do anything.
+ *
+ * 2.) When we finish writing the on-disk log (xlog_state_clean_log).
+ *
+ *     No matter what state we are in, if this isn't the dummy
+ *     transaction going out, the next state is NEED.
+ *     So, if we aren't in the DONE or DONE2 states, the next state
+ *     is NEED. We can't be finishing a write of the dummy record
+ *     unless it was committed and the state switched to DONE or DONE2.
+ *     
+ *     If we are in the DONE state and this was a write of the
+ *             dummy transaction, we move to NEED2.
+ *
+ *     If we are in the DONE2 state and this was a write of the
+ *             dummy transaction, we move to IDLE.
+ *
+ *
+ * Writing only one dummy transaction can get appended to
+ * one file space allocation. When this happens, the log recovery
+ * code replays the space allocation and a file could be truncated.
+ * This is why we have the NEED2 and DONE2 states before going idle.
+ */
+
+#define XLOG_STATE_COVER_IDLE  0
+#define XLOG_STATE_COVER_NEED  1
+#define XLOG_STATE_COVER_DONE  2
+#define XLOG_STATE_COVER_NEED2 3
+#define XLOG_STATE_COVER_DONE2 4
+
+#define XLOG_COVER_OPS         5
+
+typedef struct xlog_ticket {
+       sv_t               t_sema;       /* sleep on this semaphore      :20 */
+       struct xlog_ticket *t_next;      /*                              : 4 */
+       struct xlog_ticket *t_prev;      /*                              : 4 */
+       xlog_tid_t         t_tid;        /* transaction identifier       : 4 */
+       int                t_curr_res;   /* current reservation in bytes : 4 */
+       int                t_unit_res;   /* unit reservation in bytes    : 4 */
+       char               t_ocnt;       /* original count               : 1 */
+       char               t_cnt;        /* current count                : 1 */
+       char               t_clientid;   /* who does this belong to;     : 1 */
+       char               t_flags;      /* properties of reservation    : 1 */
+} xlog_ticket_t;
+#endif
+
+
+typedef struct xlog_op_header {
+       xlog_tid_t oh_tid;      /* transaction id of operation  :  4 b */
+       int        oh_len;      /* bytes in data region         :  2 b */
+       char       oh_clientid; /* who sent me this             :  1 b */
+       char       oh_flags;    /*                              :  1 b */
+       ushort     oh_res2;     /* 32 bit align                 :  2 b */
+} xlog_op_header_t;
+
+
+/* valid values for h_fmt */
+#define XLOG_FMT_UNKNOWN  0
+#define XLOG_FMT_LINUX_LE 1
+#define XLOG_FMT_LINUX_BE 2
+#define XLOG_FMT_IRIX_BE  3
+
+/* our fmt */
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define XLOG_FMT XLOG_FMT_LINUX_LE
+#else
+#if __BYTE_ORDER == __BIG_ENDIAN
+#define XLOG_FMT XLOG_FMT_LINUX_BE
+#else
+#error unknown byte order
+#endif
+#endif
+
+typedef struct xlog_rec_header {
+       uint      h_magicno;    /* log record (LR) identifier           :  4 */
+       uint      h_cycle;      /* write cycle of log                   :  4 */
+       int       h_version;    /* LR version                           :  4 */
+       int       h_len;        /* len in bytes; should be 64-bit aligned: 4 */
+       xfs_lsn_t h_lsn;        /* lsn of this LR                       :  8 */
+       xfs_lsn_t h_tail_lsn;   /* lsn of 1st LR w/ buffers not committed: 8 */
+       uint      h_chksum;     /* may not be used; non-zero if used    :  4 */
+       int       h_prev_block; /* block number to previous LR          :  4 */
+       int       h_num_logops; /* number of log operations in this LR  :  4 */
+       uint      h_cycle_data[XLOG_MAX_RECORD_BSIZE / BBSIZE];
+        /* new fields */
+        int       h_fmt;        /* format of log record                 :  4 */
+        uuid_t    h_fs_uuid;    /* uuid of FS                           : 16 */
+} xlog_rec_header_t;
+
+#ifdef __KERNEL__
+/*
+ * - A log record header is 512 bytes.  There is plenty of room to grow the
+ *     xlog_rec_header_t into the reserved space.
+ * - ic_data follows, so a write to disk can start at the beginning of
+ *     the iclog.
+ * - ic_forcesema is used to implement synchronous forcing of the iclog to disk.
+ * - ic_next is the pointer to the next iclog in the ring.
+ * - ic_bp is a pointer to the buffer used to write this incore log to disk.
+ * - ic_log is a pointer back to the global log structure.
+ * - ic_callback is a linked list of callback function/argument pairs to be
+ *     called after an iclog finishes writing.
+ * - ic_size is the full size of the header plus data.
+ * - ic_offset is the current number of bytes written to in this iclog.
+ * - ic_refcnt is bumped when someone is writing to the log.
+ * - ic_state is the state of the iclog.
+ */
+typedef struct xlog_iclog_fields {
+       sv_t                    ic_forcesema;
+       struct xlog_in_core     *ic_next;
+       struct xlog_in_core     *ic_prev;
+       struct xfs_buf                  *ic_bp;
+       struct log              *ic_log;
+       xfs_log_callback_t      *ic_callback;
+       xfs_log_callback_t      **ic_callback_tail;
+#ifdef DEBUG
+       struct ktrace           *ic_trace;
+#endif
+       int                     ic_size;
+       int                     ic_offset;
+       int                     ic_refcnt;
+       int                     ic_roundoff;
+       int                     ic_bwritecnt;
+       ushort_t                ic_state;
+} xlog_iclog_fields_t;
+
+typedef struct xlog_in_core {
+       union {
+               xlog_iclog_fields_t     hic_fields;
+               char                    hic_pad[BBSIZE];
+       } ic_h1;
+       union {
+               xlog_rec_header_t hic_header;
+               char              hic_sector[XLOG_HEADER_SIZE];
+       } ic_h2;
+       char                   ic_data[1];
+} xlog_in_core_t;
+
+/*
+ * Defines to save our code from this glop.
+ */
+#define        ic_forcesema    ic_h1.hic_fields.ic_forcesema
+#define        ic_next         ic_h1.hic_fields.ic_next
+#define        ic_prev         ic_h1.hic_fields.ic_prev
+#define        ic_bp           ic_h1.hic_fields.ic_bp
+#define        ic_log          ic_h1.hic_fields.ic_log
+#define        ic_callback     ic_h1.hic_fields.ic_callback
+#define        ic_callback_tail ic_h1.hic_fields.ic_callback_tail
+#define        ic_trace        ic_h1.hic_fields.ic_trace
+#define        ic_size         ic_h1.hic_fields.ic_size
+#define        ic_offset       ic_h1.hic_fields.ic_offset
+#define        ic_refcnt       ic_h1.hic_fields.ic_refcnt
+#define        ic_roundoff     ic_h1.hic_fields.ic_roundoff
+#define        ic_bwritecnt    ic_h1.hic_fields.ic_bwritecnt
+#define        ic_state        ic_h1.hic_fields.ic_state
+#define ic_header      ic_h2.hic_header
+
+/*
+ * The reservation head lsn is not made up of a cycle number and block number.
+ * Instead, it uses a cycle number and byte number.  Logs don't expect to
+ * overflow 31 bits worth of byte offset, so using a byte number will mean
+ * that round off problems won't occur when releasing partial reservations.
+ */
+typedef struct log {
+    /* The following block of fields are changed while holding icloglock */
+    sema_t             l_flushsema;    /* iclog flushing semaphore */
+    int                        l_flushcnt;     /* # of procs waiting on this sema */
+    int                        l_ticket_cnt;   /* free ticket count */
+    int                        l_ticket_tcnt;  /* total ticket count */
+    int                        l_covered_state;/* state of "covering disk log entries" */
+    xlog_ticket_t      *l_freelist;    /* free list of tickets */
+    xlog_ticket_t      *l_unmount_free;/* kmem_free these addresses */
+    xlog_ticket_t      *l_tail;        /* free list of tickets */
+    xlog_in_core_t     *l_iclog;       /* head log queue       */
+    lock_t             l_icloglock;    /* grab to change iclog state */
+    xfs_lsn_t          l_tail_lsn;     /* lsn of 1st LR w/ unflush buffers */
+    xfs_lsn_t          l_last_sync_lsn;/* lsn of last LR on disk */
+    struct xfs_mount   *l_mp;          /* mount point */
+    struct xfs_buf     *l_xbuf;        /* extra buffer for log wrapping */
+    dev_t              l_dev;          /* dev_t of log */
+    xfs_daddr_t                l_logBBstart;   /* start block of log */
+    int                        l_logsize;      /* size of log in bytes */
+    int                        l_logBBsize;    /* size of log in 512 byte chunks */
+    int                        l_roundoff;     /* round off error of all iclogs */
+    int                        l_curr_cycle;   /* Cycle number of log writes */
+    int                        l_prev_cycle;   /* Cycle # b4 last block increment */
+    int                        l_curr_block;   /* current logical block of log */
+    int                        l_prev_block;   /* previous logical block of log */
+    int                        l_iclog_size;    /* size of log in bytes */
+    int                        l_iclog_size_log;/* log power size of log */
+    int                        l_iclog_bufs;    /* number of iclog buffers */
+
+    /* The following field are used for debugging; need to hold icloglock */
+    char               *l_iclog_bak[XLOG_MAX_ICLOGS];
+
+    /* The following block of fields are changed while holding grant_lock */
+    lock_t             l_grant_lock;           /* protects below fields */
+    xlog_ticket_t      *l_reserve_headq;       /* */
+    xlog_ticket_t      *l_write_headq;         /* */
+    int                        l_grant_reserve_cycle;  /* */
+    int                        l_grant_reserve_bytes;  /* */
+    int                        l_grant_write_cycle;    /* */
+    int                        l_grant_write_bytes;    /* */
+
+    /* The following fields don't need locking */
+#ifdef DEBUG
+    struct ktrace      *l_trace;
+    struct ktrace      *l_grant_trace;
+#endif
+    uint               l_flags;
+    uint               l_quotaoffs_flag;/* XFS_DQ_*, if QUOTAOFFs found */
+    struct xfs_buf_cancel **l_buf_cancel_table;        
+} xlog_t;
+
+
+/* common routines */
+extern xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp,
+                                     xlog_in_core_t *iclog);
+extern int      xlog_find_head(xlog_t *log, xfs_daddr_t *head_blk);
+extern int      xlog_find_tail(xlog_t  *log,
+                               xfs_daddr_t *head_blk,
+                               xfs_daddr_t *tail_blk,
+                               int readonly);
+extern int      xlog_print_find_oldest(xlog_t *log, xfs_daddr_t *last_blk);
+extern int      xlog_recover(xlog_t *log, int readonly);
+extern int      xlog_recover_finish(xlog_t *log, int mfsi_flags);
+extern void     xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog);
+extern struct xfs_buf *xlog_get_bp(int,xfs_mount_t *);
+extern void     xlog_put_bp(struct xfs_buf *);
+extern int      xlog_bread(xlog_t *, xfs_daddr_t blkno, int bblks, struct xfs_buf *bp);
+extern void     xlog_recover_process_iunlinks(xlog_t *log);
+
+#define XLOG_TRACE_GRAB_FLUSH  1
+#define XLOG_TRACE_REL_FLUSH   2
+#define XLOG_TRACE_SLEEP_FLUSH 3
+#define XLOG_TRACE_WAKE_FLUSH  4
+
+#endif /* __KERNEL__ */
+
+#endif /* __XFS_LOG_PRIV_H__ */
diff --git a/include/xfs_log_recover.h b/include/xfs_log_recover.h
new file mode 100644 (file)
index 0000000..233cb16
--- /dev/null
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef        __XFS_LOG_RECOVER_H__
+#define __XFS_LOG_RECOVER_H__
+
+/*
+ * Macros, structures, prototypes for internal log manager use.
+ */
+
+#define XLOG_RHASH_BITS  4
+#define XLOG_RHASH_SIZE        16
+#define XLOG_RHASH_SHIFT 2
+#define XLOG_RHASH(tid)        \
+       ((((__uint32_t)tid)>>XLOG_RHASH_SHIFT) & (XLOG_RHASH_SIZE-1))
+
+#define XLOG_MAX_REGIONS_IN_ITEM   (XFS_MAX_BLOCKSIZE / XFS_BLI_CHUNK / 2 + 1)
+
+
+/*
+ * item headers are in ri_buf[0].  Additional buffers follow.
+ */
+typedef struct xlog_recover_item {
+       struct xlog_recover_item *ri_next;
+       struct xlog_recover_item *ri_prev;
+       int                      ri_type;
+       int                      ri_cnt;        /* count of regions found */
+       int                      ri_total;      /* total regions */
+       xfs_log_iovec_t          *ri_buf;       /* ptr to regions buffer */
+} xlog_recover_item_t;
+
+struct xlog_tid;
+typedef struct xlog_recover {
+       struct xlog_recover *r_next;
+       xlog_tid_t          r_log_tid;          /* log's transaction id */
+       xfs_trans_header_t  r_theader;          /* trans header for partial */
+       int                 r_state;            /* not needed */
+       xfs_lsn_t           r_lsn;              /* xact lsn */
+       xlog_recover_item_t *r_itemq;           /* q for items */
+} xlog_recover_t;
+
+#define ITEM_TYPE(i)   (*(ushort *)(i)->ri_buf[0].i_addr)
+
+/*
+ * This is the number of entries in the l_buf_cancel_table used during
+ * recovery.
+ */
+#define        XLOG_BC_TABLE_SIZE      64
+
+#define        XLOG_RECOVER_PASS1      1
+#define        XLOG_RECOVER_PASS2      2
+
+#endif /* __XFS_LOG_RECOVER_H__ */
diff --git a/include/xfs_mount.h b/include/xfs_mount.h
new file mode 100644 (file)
index 0000000..b026f20
--- /dev/null
@@ -0,0 +1,490 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_MOUNT_H__
+#define        __XFS_MOUNT_H__
+
+
+typedef struct xfs_trans_reservations {
+       uint    tr_write;       /* extent alloc trans */
+       uint    tr_itruncate;   /* truncate trans */
+       uint    tr_rename;      /* rename trans */
+       uint    tr_link;        /* link trans */
+       uint    tr_remove;      /* unlink trans */
+       uint    tr_symlink;     /* symlink trans */
+       uint    tr_create;      /* create trans */
+       uint    tr_mkdir;       /* mkdir trans */
+       uint    tr_ifree;       /* inode free trans */
+       uint    tr_ichange;     /* inode update trans */
+       uint    tr_growdata;    /* fs data section grow trans */
+       uint    tr_swrite;      /* sync write inode trans */
+       uint    tr_addafork;    /* cvt inode to attributed trans */
+       uint    tr_writeid;     /* write setuid/setgid file */
+       uint    tr_attrinval;   /* attr fork buffer invalidation */
+       uint    tr_attrset;     /* set/create an attribute */
+       uint    tr_attrrm;      /* remove an attribute */
+       uint    tr_clearagi;    /* clear bad agi unlinked ino bucket */
+       uint    tr_growrtalloc; /* grow realtime allocations */
+       uint    tr_growrtzero;  /* grow realtime zeroing */
+       uint    tr_growrtfree;  /* grow realtime freeing */
+} xfs_trans_reservations_t;
+
+
+#ifndef __KERNEL__
+/*
+ * Moved here from xfs_ag.h to avoid reordering header files
+ */
+#define XFS_DADDR_TO_AGNO(mp,d) \
+       ((xfs_agnumber_t)(XFS_BB_TO_FSBT(mp, d) / (mp)->m_sb.sb_agblocks))
+#define XFS_DADDR_TO_AGBNO(mp,d) \
+       ((xfs_agblock_t)(XFS_BB_TO_FSBT(mp, d) % (mp)->m_sb.sb_agblocks))
+#else
+struct cred;
+struct mounta;
+struct vfs;
+struct vnode;
+struct xfs_args;
+struct xfs_ihash;
+struct xfs_chash;
+struct xfs_inode;
+struct xfs_perag;
+struct xfs_quotainfo;
+struct xfs_iocore;
+struct xfs_dio;
+struct xfs_bmbt_irec;
+struct xfs_bmap_free;
+
+#if defined(INTERRUPT_LATENCY_TESTING)
+#define        SPLDECL(s)             
+#define        AIL_LOCK_T              mutex_t
+#define        AIL_LOCKINIT(x,y)       mutex_init(x,MUTEX_DEFAULT, y)
+#define        AIL_LOCK_DESTROY(x)     mutex_destroy(x)
+#define        AIL_LOCK(mp,s)          mutex_lock(&(mp)->m_ail_lock, PZERO)
+#define        AIL_UNLOCK(mp,s)        mutex_unlock(&(mp)->m_ail_lock)
+#else  /* !INTERRUPT_LATENCY_TESTING */
+#define        SPLDECL(s)              int s
+#define        AIL_LOCK_T              lock_t
+#define        AIL_LOCKINIT(x,y)       spinlock_init(x,y)
+#define        AIL_LOCK_DESTROY(x)     spinlock_destroy(x)
+#define        AIL_LOCK(mp,s)          s=mutex_spinlock(&(mp)->m_ail_lock)
+#define        AIL_UNLOCK(mp,s)        mutex_spinunlock(&(mp)->m_ail_lock, s)
+#endif /* !INTERRUPT_LATENCY_TESTING */
+
+
+/* Prototypes and functions for I/O core modularization, a vector
+ * of functions is used to indirect from xfs/cxfs independent code
+ * to the xfs/cxfs dependent code.
+ * The vector is placed in the mount structure so that we can
+ * minimize the number of memory indirections involved.
+ */
+
+typedef int            (*xfs_dio_write_t)(struct xfs_dio *);
+typedef int            (*xfs_dio_read_t)(struct xfs_dio *);
+typedef int            (*xfs_strat_write_t)(struct xfs_iocore *, struct xfs_buf *);
+typedef int            (*xfs_bmapi_t)(struct xfs_trans *, void *,
+                               xfs_fileoff_t, xfs_filblks_t, int,
+                               xfs_fsblock_t *, xfs_extlen_t,
+                               struct xfs_bmbt_irec *, int *,
+                               struct xfs_bmap_free *);
+typedef int            (*xfs_bmap_eof_t)(void *, xfs_fileoff_t, int, int *);
+typedef int            (*xfs_rsync_t)(void *, int, xfs_off_t, xfs_off_t);
+typedef uint           (*xfs_lck_map_shared_t)(void *);
+typedef void           (*xfs_lock_t)(void *, uint);
+typedef void           (*xfs_lock_demote_t)(void *, uint);
+typedef int            (*xfs_lock_nowait_t)(void *, uint);
+typedef void           (*xfs_unlk_t)(void *, unsigned int);
+typedef void           (*xfs_chgtime_t)(void *, int);
+typedef xfs_fsize_t    (*xfs_size_t)(void *);
+typedef xfs_fsize_t    (*xfs_setsize_t)(void *, xfs_off_t);
+typedef xfs_fsize_t    (*xfs_lastbyte_t)(void *);
+
+#ifdef CELL_CAPABLE
+typedef int             (*xfs_checklock_t)(bhv_desc_t *, struct vnode *,
+                                int, off_t, off_t, int, struct cred *,
+                                struct flid *, vrwlock_t, int);
+#endif
+
+typedef struct xfs_ioops {
+       xfs_dio_write_t         xfs_dio_write_func;
+       xfs_dio_read_t          xfs_dio_read_func;
+       xfs_strat_write_t       xfs_strat_write_func;
+       xfs_bmapi_t             xfs_bmapi_func;
+       xfs_bmap_eof_t          xfs_bmap_eof_func;
+       xfs_rsync_t             xfs_rsync_func;
+       xfs_lck_map_shared_t    xfs_lck_map_shared;
+       xfs_lock_t              xfs_ilock;
+       xfs_lock_demote_t       xfs_ilock_demote;
+       xfs_lock_nowait_t       xfs_ilock_nowait;
+       xfs_unlk_t              xfs_unlock;
+       xfs_chgtime_t           xfs_chgtime;    
+       xfs_size_t              xfs_size_func;
+       xfs_setsize_t           xfs_setsize_func;
+       xfs_lastbyte_t          xfs_lastbyte;
+#ifdef CELL_CAPABLE
+        xfs_checklock_t         xfs_checklock;
+#endif
+} xfs_ioops_t;
+
+
+#define XFS_DIO_WRITE(mp, diop) \
+       (*(mp)->m_io_ops.xfs_dio_write_func)(diop)
+
+#define XFS_DIO_READ(mp, diop) \
+       (*(mp)->m_io_ops.xfs_dio_read_func)(diop)
+
+#define XFS_STRAT_WRITE(mp, io, bp) \
+       (*(mp)->m_io_ops.xfs_strat_write_func)(io, bp)
+
+#define XFS_BMAPI(mp, trans,io,bno,len,f,first,tot,mval,nmap,flist)    \
+       (*(mp)->m_io_ops.xfs_bmapi_func) \
+               (trans,(io)->io_obj,bno,len,f,first,tot,mval,nmap,flist)
+
+#define XFS_BMAP_EOF(mp, io, endoff, whichfork, eof) \
+       (*(mp)->m_io_ops.xfs_bmap_eof_func) \
+               ((io)->io_obj, endoff, whichfork, eof)
+
+#define XFS_RSYNC(mp, io, ioflag, start, end) \
+       (*(mp)->m_io_ops.xfs_rsync_func)((io)->io_obj, ioflag, start, end)
+
+#define XFS_LCK_MAP_SHARED(mp, io) \
+       (*(mp)->m_io_ops.xfs_lck_map_shared)((io)->io_obj)
+
+#define XFS_UNLK_MAP_SHARED(mp, io, mode) \
+       (*(mp)->m_io_ops.xfs_unlock)((io)->io_obj, mode)
+
+#define XFS_ILOCK(mp, io, mode) \
+       (*(mp)->m_io_ops.xfs_ilock)((io)->io_obj, mode)
+
+#define XFS_ILOCK_NOWAIT(mp, io, mode) \
+       (*(mp)->m_io_ops.xfs_ilock_nowait)((io)->io_obj, mode)
+
+#define XFS_IUNLOCK(mp, io, mode) \
+       (*(mp)->m_io_ops.xfs_unlock)((io)->io_obj, mode)
+
+#define XFS_ILOCK_DEMOTE(mp, io, mode) \
+       (*(mp)->m_io_ops.xfs_ilock_demote)((io)->io_obj, mode)
+
+#define XFS_CHGTIME(mp, io, flags) \
+       (*(mp)->m_io_ops.xfs_chgtime)((io)->io_obj, flags)
+
+#define XFS_SIZE(mp, io) \
+       (*(mp)->m_io_ops.xfs_size_func)((io)->io_obj)
+
+#define XFS_SETSIZE(mp, io, newsize) \
+       (*(mp)->m_io_ops.xfs_setsize_func)((io)->io_obj, newsize)
+
+#define XFS_LASTBYTE(mp, io) \
+       (*(mp)->m_io_ops.xfs_lastbyte)((io)->io_obj)
+
+
+typedef struct xfs_mount {
+       bhv_desc_t              m_bhv;          /* vfs xfs behavior */
+       xfs_tid_t               m_tid;          /* next unused tid for fs */
+       AIL_LOCK_T              m_ail_lock;     /* fs AIL mutex */
+       xfs_ail_entry_t         m_ail;          /* fs active log item list */
+       uint                    m_ail_gen;      /* fs AIL generation count */
+       xfs_sb_t                m_sb;           /* copy of fs superblock */
+       lock_t                  m_sb_lock;      /* sb counter mutex */
+       struct xfs_buf          *m_sb_bp;       /* buffer for superblock */
+       char                    *m_fsname;      /* filesystem name */
+       int                     m_fsname_len;   /* strlen of fs name */
+       int                     m_bsize;        /* fs logical block size */
+       xfs_agnumber_t          m_agfrotor;     /* last ag where space found */
+       xfs_agnumber_t          m_agirotor;     /* last ag dir inode alloced */
+       int                     m_ihsize;       /* size of next field */
+       struct xfs_ihash        *m_ihash;       /* fs private inode hash table*/
+       struct xfs_inode        *m_inodes;      /* active inode list */
+       mutex_t                 m_ilock;        /* inode list mutex */
+       uint                    m_ireclaims;    /* count of calls to reclaim*/
+       uint                    m_readio_log;   /* min read size log bytes */
+       uint                    m_readio_blocks; /* min read size blocks */
+       uint                    m_writeio_log;  /* min write size log bytes */
+       uint                    m_writeio_blocks; /* min write size blocks */
+       void                    *m_log;         /* log specific stuff */
+       int                     m_logbufs;      /* number of log buffers */
+       int                     m_logbsize;     /* size of each log buffer */
+       uint                    m_rsumlevels;   /* rt summary levels */
+       uint                    m_rsumsize;     /* size of rt summary, bytes */
+       struct xfs_inode        *m_rbmip;       /* pointer to bitmap inode */
+       struct xfs_inode        *m_rsumip;      /* pointer to summary inode */
+       struct xfs_inode        *m_rootip;      /* pointer to root directory */
+       struct xfs_quotainfo    *m_quotainfo;   /* disk quota information */
+       buftarg_t               m_ddev_targ;    /* ptr to data device */
+       buftarg_t               m_logdev_targ;  /* ptr to log device */
+       buftarg_t               m_rtdev_targ;   /* ptr to rt device */
+       buftarg_t               *m_ddev_targp;  /* saves taking the address */
+#define m_dev          m_ddev_targ.dev
+#define m_logdev       m_logdev_targ.dev
+#define m_rtdev                m_rtdev_targ.dev
+       __uint8_t               m_dircook_elog; /* log d-cookie entry bits */
+       __uint8_t               m_blkbit_log;   /* blocklog + NBBY */
+       __uint8_t               m_blkbb_log;    /* blocklog - BBSHIFT */
+       __uint8_t               m_agno_log;     /* log #ag's */
+       __uint8_t               m_agino_log;    /* #bits for agino in inum */
+       __uint8_t               m_nreadaheads;  /* #readahead buffers */
+       __uint16_t              m_inode_cluster_size;/* min inode buf size */
+       uint                    m_blockmask;    /* sb_blocksize-1 */
+       uint                    m_blockwsize;   /* sb_blocksize in words */
+       uint                    m_blockwmask;   /* blockwsize-1 */
+       uint                    m_alloc_mxr[2]; /* XFS_ALLOC_BLOCK_MAXRECS */
+       uint                    m_alloc_mnr[2]; /* XFS_ALLOC_BLOCK_MINRECS */
+       uint                    m_bmap_dmxr[2]; /* XFS_BMAP_BLOCK_DMAXRECS */
+       uint                    m_bmap_dmnr[2]; /* XFS_BMAP_BLOCK_DMINRECS */
+       uint                    m_inobt_mxr[2]; /* XFS_INOBT_BLOCK_MAXRECS */
+       uint                    m_inobt_mnr[2]; /* XFS_INOBT_BLOCK_MINRECS */
+       uint                    m_ag_maxlevels; /* XFS_AG_MAXLEVELS */
+       uint                    m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */
+       uint                    m_in_maxlevels; /* XFS_IN_MAXLEVELS */
+       struct xfs_perag        *m_perag;       /* per-ag accounting info */
+       mrlock_t                m_peraglock;    /* lock for m_perag (pointer) */
+       sema_t                  m_growlock;     /* growfs mutex */
+       int                     m_fixedfsid[2]; /* unchanged for life of FS */
+       uint                    m_dmevmask;     /* DMI events for this FS */
+       uint                    m_flags;        /* global mount flags */
+       uint                    m_attroffset;   /* inode attribute offset */
+       int                     m_da_node_ents; /* how many entries in danode */
+       int                     m_ialloc_inos;  /* inodes in inode allocation */
+       int                     m_ialloc_blks;  /* blocks in inode allocation */
+       int                     m_litino;       /* size of inode union area */
+       int                     m_inoalign_mask;/* mask sb_inoalignmt if used */
+       uint                    m_qflags;       /* quota status flags */
+       xfs_trans_reservations_t m_reservations;/* precomputed res values */
+       __uint64_t              m_maxicount;    /* maximum inode count */
+       __uint64_t              m_resblks;      /* total reserved blocks */
+       __uint64_t              m_resblks_avail;/* available reserved blocks */
+#if XFS_BIG_FILESYSTEMS
+       xfs_ino_t               m_inoadd;       /* add value for ino64_offset */
+#endif
+       int                     m_dalign;       /* stripe unit */
+       int                     m_swidth;       /* stripe width */
+       int                     m_sinoalign;    /* stripe unit inode alignmnt */
+       int                     m_attr_magicpct;/* 37% of the blocksize */
+       int                     m_dir_magicpct; /* 37% of the dir blocksize */
+       __uint8_t               m_mk_sharedro;  /* mark shared ro on unmount */
+        __uint8_t               m_inode_quiesce;/* call quiesce on new inodes.
+                                                   field governed by m_ilock */
+       __uint8_t               m_dirversion;   /* 1 or 2 */
+       xfs_dirops_t            m_dirops;       /* table of dir funcs */
+       int                     m_dirblksize;   /* directory block sz--bytes */
+       int                     m_dirblkfsbs;   /* directory block sz--fsbs */
+       xfs_dablk_t             m_dirdatablk;   /* blockno of dir data v2 */
+       xfs_dablk_t             m_dirleafblk;   /* blockno of dir non-data v2 */
+       xfs_dablk_t             m_dirfreeblk;   /* blockno of dirfreeindex v2 */
+       int                     m_chsize;       /* size of next field */
+       struct xfs_chash        *m_chash;       /* fs private inode per-cluster
+                                                * hash table */
+       struct xfs_ioops        m_io_ops;       /* vector of I/O ops */
+        struct xfs_expinfo      *m_expinfo;     /* info to export to other 
+                                                   cells. */
+       uint64_t                m_shadow_pinmask;
+                                               /* which bits matter in rpc
+                                                  log item pin masks */
+       uint                    m_cxfstype;     /* mounted shared, etc. */
+} xfs_mount_t;
+
+/*
+ * Flags for m_flags.
+ */
+#define        XFS_MOUNT_WSYNC         0x00000001      /* for nfs - all metadata ops
+                                                  must be synchronous except
+                                                  for space allocations */
+#if XFS_BIG_FILESYSTEMS
+#define        XFS_MOUNT_INO64         0x00000002
+#endif
+#define XFS_MOUNT_ROOTQCHECK   0x00000004
+                            /* 0x00000008      -- currently unused */
+#define XFS_MOUNT_FS_SHUTDOWN  0x00000010      /* atomic stop of all filesystem
+                                                  operations, typically for
+                                                  disk errors in metadata */
+#define XFS_MOUNT_NOATIME      0x00000020      /* don't modify inode access
+                                                  times on reads */
+#define XFS_MOUNT_RETERR       0x00000040      /* return alignment errors to
+                                                   user */
+#define XFS_MOUNT_NOALIGN      0x00000080      /* turn off stripe alignment 
+                                                  allocations */
+                            /* 0x00000100      -- currently unused */
+#define XFS_MOUNT_REGISTERED    0x00000200      /* registered with cxfs master
+                                                   cell logic */
+#define XFS_MOUNT_NORECOVERY           0x00000400      /* no recovery - dirty fs */
+#define XFS_MOUNT_SHARED       0x00000800      /* shared mount */
+#define XFS_MOUNT_DFLT_IOSIZE          0x00001000      /* set default i/o size */
+#define XFS_MOUNT_OSYNCISDSYNC         0x00002000      /* treat o_sync like o_dsync */
+
+/*
+ * Flags for m_cxfstype
+ */
+#define XFS_CXFS_NOT           0x00000001      /* local mount */
+#define XFS_CXFS_SERVER                0x00000002      /* we're the CXFS server */
+#define XFS_CXFS_CLIENT                0x00000004      /* We're a CXFS client */
+#define XFS_CXFS_REC_ENABLED   0x00000008      /* recovery is enabled */
+
+#define XFS_FORCED_SHUTDOWN(mp)        ((mp)->m_flags & XFS_MOUNT_FS_SHUTDOWN)
+
+/*
+ * Default minimum read and write sizes.
+ */
+#define        XFS_READIO_LOG_SMALL    15      /* <= 32MB memory */
+#define        XFS_WRITEIO_LOG_SMALL   15
+#define        XFS_READIO_LOG_LARGE    16      /* > 32MB memory */
+#define        XFS_WRITEIO_LOG_LARGE   16
+
+/*
+ * max and min values for UIO and mount-option defined I/O sizes
+ * min value can't be less than a page.  Lower limit for 4K machines
+ * is 8K because that's what was tested.
+ */
+#define XFS_MAX_IO_LOG         16      /* 64K */
+
+#if (_PAGESZ == 16384) || (_PAGESZ == 8192)
+#define XFS_MIN_IO_LOG         14      /* 16K */
+#elif _PAGESZ == 4096
+#define XFS_MIN_IO_LOG         13      /* 8K */
+#else
+#error "Unknown page size"
+#endif
+
+
+/*
+ * Synchronous read and write sizes.  This should be
+ * better for NFSv2 wsync filesystems.
+ */
+#define        XFS_WSYNC_READIO_LOG    15      /* 32K */
+#define        XFS_WSYNC_WRITEIO_LOG   14      /* 16K */
+
+/* 
+ * Flags sent to xfs_force_shutdown.
+ */
+#define XFS_METADATA_IO_ERROR  0x1
+#define XFS_LOG_IO_ERROR       0x2
+#define XFS_FORCE_UMOUNT       0x4
+#define XFS_CORRUPT_INCORE     0x8     /* corrupt in-memory data structures */
+#if CELL_CAPABLE
+#define XFS_SHUTDOWN_REMOTE_REQ        0x10    /* shutdown req came from remote cell */
+#endif
+
+/*
+ * xflags for xfs_syncsub
+ */
+#define XFS_XSYNC_RELOC                0x01
+
+/*
+ * Flags for xfs_mountfs
+ */
+#define XFS_MFSI_SECOND         0x01   /* Is a cxfs secondary mount -- skip */
+                                       /* stuff which should only be done */
+                                       /* once. */
+#define XFS_MFSI_CLIENT         0x02    /* Is a client -- skip lots of stuff */
+#define XFS_MFSI_NOUNLINK      0x08    /* Skip unlinked inode processing in */
+                                       /* log recovery */
+
+/*
+ * Macros for getting from mount to vfs and back.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_MTOVFS)
+struct vfs *xfs_mtovfs(xfs_mount_t *mp);
+#define        XFS_MTOVFS(mp)          xfs_mtovfs(mp)
+#else
+#define        XFS_MTOVFS(mp)          (bhvtovfs(&(mp)->m_bhv))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BHVTOM)
+xfs_mount_t *xfs_bhvtom(bhv_desc_t *bdp);
+#define        XFS_BHVTOM(bdp) xfs_bhvtom(bdp)
+#else
+#define        XFS_BHVTOM(bdp)         ((xfs_mount_t *)BHV_PDATA(bdp))
+#endif
+
+/*
+ * Moved here from xfs_ag.h to avoid reordering header files
+ */
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DADDR_TO_AGNO)
+xfs_agnumber_t xfs_daddr_to_agno(struct xfs_mount *mp, xfs_daddr_t d);
+#define XFS_DADDR_TO_AGNO(mp,d)         xfs_daddr_to_agno(mp,d)
+#else
+
+static inline xfs_agnumber_t XFS_DADDR_TO_AGNO(xfs_mount_t *mp, xfs_daddr_t d)
+{
+        d = XFS_BB_TO_FSBT(mp, d);
+        do_div(d, mp->m_sb.sb_agblocks);
+        return (xfs_agnumber_t) d;
+}
+
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DADDR_TO_AGBNO)
+xfs_agblock_t xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d);
+#define XFS_DADDR_TO_AGBNO(mp,d)        xfs_daddr_to_agbno(mp,d)
+#else
+
+static inline xfs_agblock_t XFS_DADDR_TO_AGBNO(xfs_mount_t *mp, xfs_daddr_t d)
+{
+        d = XFS_BB_TO_FSBT(mp, d);
+        return (xfs_agblock_t) do_div(d, mp->m_sb.sb_agblocks);
+}
+
+#endif
+
+/*
+ * This structure is for use by the xfs_mod_incore_sb_batch() routine.
+ */
+typedef struct xfs_mod_sb {
+       xfs_sb_field_t  msb_field;      /* Field to modify, see below */
+       int             msb_delta;      /* change to make to the specified field */
+} xfs_mod_sb_t;
+
+#define        XFS_MOUNT_ILOCK(mp)     mutex_lock(&((mp)->m_ilock), PINOD)
+#define        XFS_MOUNT_IUNLOCK(mp)   mutex_unlock(&((mp)->m_ilock))
+#define        XFS_SB_LOCK(mp)         mutex_spinlock(&(mp)->m_sb_lock)
+#define        XFS_SB_UNLOCK(mp,s)     mutex_spinunlock(&(mp)->m_sb_lock,(s))
+
+void           xfs_mod_sb(xfs_trans_t *, __int64_t);
+xfs_mount_t    *xfs_mount_init(void);
+void           xfs_mount_free(xfs_mount_t *mp, int remove_bhv);
+int            xfs_mountfs(struct vfs *, xfs_mount_t *mp, dev_t, int);
+int            xfs_mountargs(struct mounta *, struct xfs_args *);
+
+int            xfs_unmountfs(xfs_mount_t *, int, struct cred *);
+void           xfs_unmountfs_close(xfs_mount_t *, int, struct cred *);
+int             xfs_unmountfs_writesb(xfs_mount_t *);
+int             xfs_unmount_flush(xfs_mount_t *, int);
+int            xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int, int);
+int            xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *, uint, int);
+int            xfs_readsb(xfs_mount_t *mp, dev_t);
+struct xfs_buf *xfs_getsb(xfs_mount_t *, int);
+void            xfs_freesb(xfs_mount_t *);
+void           xfs_force_shutdown(struct xfs_mount *, int);
+int            xfs_syncsub(xfs_mount_t *, int, int, int *);
+void           xfs_xlatesb(void *, struct xfs_sb *, int, xfs_arch_t, __int64_t);
+extern struct vfsops xfs_vfsops;
+
+#endif /* __KERNEL__ */
+
+#endif /* __XFS_MOUNT_H__ */
diff --git a/include/xfs_quota.h b/include/xfs_quota.h
new file mode 100644 (file)
index 0000000..794b90e
--- /dev/null
@@ -0,0 +1,320 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_QUOTA_H__
+#define __XFS_QUOTA_H__
+
+/* 
+ * We use only 16-bit prid's in the inode, not the 64-bit version in the proc.
+ * uid_t is hard-coded to 32 bits in the inode. Hence, an 'id' in a dquot is
+ * 32 bits..
+ */
+typedef __int32_t      xfs_dqid_t;
+/*
+ * Eventhough users may not have quota limits occupying all 64-bits, 
+ * they may need 64-bit accounting. Hence, 64-bit quota-counters,
+ * and quota-limits. This is a waste in the common case, but heh ...
+ */
+typedef __uint64_t     xfs_qcnt_t;
+typedef __uint16_t      xfs_qwarncnt_t;
+
+/* 
+ * Disk quotas status in m_qflags, and also sb_qflags. 16 bits.
+ */
+#define XFS_UQUOTA_ACCT        0x0001  /* user quota accounting ON */
+#define XFS_UQUOTA_ENFD        0x0002  /* user quota limits enforced */
+#define XFS_UQUOTA_CHKD        0x0004  /* quotacheck run on usr quotas */
+#define XFS_PQUOTA_ACCT        0x0008  /* project quota accounting ON */
+#define XFS_PQUOTA_ENFD        0x0010  /* proj quota limits enforced */
+#define XFS_PQUOTA_CHKD        0x0020  /* quotacheck run on prj quotas */
+
+/* 
+ * Incore only flags for quotaoff - these bits get cleared when quota(s)
+ * are in the process of getting turned off. These flags are in m_qflags but
+ * never in sb_qflags.
+ */
+#define XFS_UQUOTA_ACTIVE      0x0040  /* uquotas are being turned off */
+#define XFS_PQUOTA_ACTIVE      0x0080  /* pquotas are being turned off */
+
+/*
+ * Typically, we turn quotas off if we weren't explicitly asked to 
+ * mount quotas. This is the mount option not to do that.
+ * This option is handy in the miniroot, when trying to mount /root.
+ * We can't really know what's in /etc/fstab until /root is already mounted!
+ * This stops quotas getting turned off in the root filesystem everytime
+ * the system boots up a miniroot.
+ */
+#define XFS_QUOTA_MAYBE                0x0100 /* Turn quotas on if SB has quotas on */
+
+/*
+ * Checking XFS_IS_*QUOTA_ON() while holding any inode lock guarantees
+ * quota will be not be switched off as long as that inode lock is held.
+ */
+#define XFS_IS_QUOTA_ON(mp)    ((mp)->m_qflags & (XFS_UQUOTA_ACTIVE | \
+                                                  XFS_PQUOTA_ACTIVE))
+#define XFS_IS_UQUOTA_ON(mp)   ((mp)->m_qflags & XFS_UQUOTA_ACTIVE)
+#define XFS_IS_PQUOTA_ON(mp)   ((mp)->m_qflags & XFS_PQUOTA_ACTIVE)
+
+/*
+ * Flags to tell various functions what to do. Not all of these are meaningful
+ * to a single function. None of these XFS_QMOPT_* flags are meant to have
+ * persistent values (ie. their values can and will change between versions)
+ */
+#define XFS_QMOPT_DQLOCK       0x0000001 /* dqlock */
+#define XFS_QMOPT_DQALLOC      0x0000002 /* alloc dquot ondisk if needed */
+#define XFS_QMOPT_UQUOTA       0x0000004 /* user dquot requested */
+#define XFS_QMOPT_PQUOTA       0x0000008 /* proj dquot requested */
+#define XFS_QMOPT_FORCE_RES    0x0000010 /* ignore quota limits */
+#define XFS_QMOPT_DQSUSER      0x0000020 /* don't cache super users dquot */
+#define XFS_QMOPT_SBVERSION    0x0000040 /* change superblock version num */
+#define XFS_QMOPT_QUOTAOFF     0x0000080 /* quotas are being turned off */
+#define XFS_QMOPT_UMOUNTING    0x0000100 /* filesys is being unmounted */
+#define XFS_QMOPT_DOLOG                0x0000200 /* log buf changes (in quotacheck) */
+#define XFS_QMOPT_DOWARN        0x0000400 /* increase warning cnt if necessary */
+#define XFS_QMOPT_ILOCKED      0x0000800 /* inode is already locked (excl) */
+#define XFS_QMOPT_DQREPAIR     0x0001000 /* repair dquot, if damaged. */
+
+/* 
+ * flags to xfs_trans_mod_dquot to indicate which field needs to be
+ * modified.
+ */
+#define XFS_QMOPT_RES_REGBLKS  0x0010000
+#define XFS_QMOPT_RES_RTBLKS   0x0020000
+#define XFS_QMOPT_BCOUNT       0x0040000
+#define XFS_QMOPT_ICOUNT       0x0080000
+#define XFS_QMOPT_RTBCOUNT     0x0100000
+#define XFS_QMOPT_DELBCOUNT    0x0200000
+#define XFS_QMOPT_DELRTBCOUNT  0x0400000
+#define XFS_QMOPT_RES_INOS     0x0800000
+
+/*
+ * flags for dqflush and dqflush_all.
+ */
+#define XFS_QMOPT_SYNC         0x1000000
+#define XFS_QMOPT_ASYNC                0x2000000
+#define XFS_QMOPT_DELWRI       0x4000000
+
+/* 
+ * flags to xfs_trans_mod_dquot.
+ */
+#define XFS_TRANS_DQ_RES_BLKS  XFS_QMOPT_RES_REGBLKS
+#define XFS_TRANS_DQ_RES_RTBLKS        XFS_QMOPT_RES_RTBLKS
+#define XFS_TRANS_DQ_RES_INOS  XFS_QMOPT_RES_INOS
+#define XFS_TRANS_DQ_BCOUNT    XFS_QMOPT_BCOUNT
+#define XFS_TRANS_DQ_DELBCOUNT XFS_QMOPT_DELBCOUNT
+#define XFS_TRANS_DQ_ICOUNT    XFS_QMOPT_ICOUNT
+#define XFS_TRANS_DQ_RTBCOUNT  XFS_QMOPT_RTBCOUNT
+#define XFS_TRANS_DQ_DELRTBCOUNT XFS_QMOPT_DELRTBCOUNT
+
+
+#define XFS_QMOPT_QUOTALL      (XFS_QMOPT_UQUOTA|XFS_QMOPT_PQUOTA)
+#define XFS_QMOPT_RESBLK_MASK  (XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_RES_RTBLKS)
+
+/*
+ * This check is done typically without holding the inode lock;
+ * that may seem racey, but it is harmless in the context that it is used.
+ * The inode cannot go inactive as long a reference is kept, and 
+ * therefore if dquot(s) were attached, they'll stay consistent.
+ * If, for example, the ownership of the inode changes while
+ * we didnt have the inode locked, the appropriate dquot(s) will be
+ * attached atomically.
+ */
+#define XFS_NOT_DQATTACHED(mp, ip) ((XFS_IS_UQUOTA_ON(mp) &&\
+                                    (ip)->i_udquot == NULL) || \
+                                   (XFS_IS_PQUOTA_ON(mp) && \
+                                    (ip)->i_pdquot == NULL))
+
+#define XFS_QM_NEED_QUOTACHECK(mp) ((XFS_IS_UQUOTA_ON(mp) && \
+                                    (mp->m_sb.sb_qflags & \
+                                     XFS_UQUOTA_CHKD) == 0) || \
+                                   (XFS_IS_PQUOTA_ON(mp) && \
+                                    (mp->m_sb.sb_qflags & \
+                                     XFS_PQUOTA_CHKD) == 0))
+
+#define XFS_MOUNT_QUOTA_ALL    (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\
+                                XFS_UQUOTA_CHKD|XFS_PQUOTA_ACCT|\
+                                XFS_PQUOTA_ENFD|XFS_PQUOTA_CHKD)
+#define XFS_MOUNT_QUOTA_MASK   (XFS_MOUNT_QUOTA_ALL | XFS_UQUOTA_ACTIVE | \
+                                XFS_PQUOTA_ACTIVE)
+
+#define XFS_IS_REALTIME_INODE(ip) ((ip)->i_d.di_flags & XFS_DIFLAG_REALTIME)
+
+
+#ifdef __KERNEL__
+/*
+ * External Interface to the XFS disk quota subsystem.
+ */
+struct bhv_desc;
+struct  vfs;
+struct  xfs_disk_dquot;
+struct  xfs_dqhash;
+struct  xfs_dquot;
+struct  xfs_inode;
+struct  xfs_mount;
+struct  xfs_trans;
+
+/*
+ * Quota Manager Interface.
+ */
+extern struct xfs_qm   *xfs_qm_init(void);
+extern void            xfs_qm_destroy(struct xfs_qm *);
+extern int             xfs_qm_dqflush_all(struct xfs_mount *, int);
+extern int             xfs_qm_dqattach(struct xfs_inode *, uint);
+extern int             xfs_qm_dqpurge_all(struct xfs_mount *, uint);
+extern void            xfs_qm_mount_quotainit(struct xfs_mount *, uint);
+extern void            xfs_qm_unmount_quotadestroy(struct xfs_mount *);
+extern int             xfs_qm_mount_quotas(struct xfs_mount *);
+extern int             xfs_qm_unmount_quotas(struct xfs_mount *);
+extern void            xfs_qm_dqdettach_inode(struct xfs_inode *);
+extern int             xfs_qm_sync(struct xfs_mount *, short);
+
+
+/*
+ * system call interface
+ */
+extern int             xfs_quotactl(xfs_mount_t *, struct vfs *, int, int,
+                                    int, xfs_caddr_t);
+
+/*
+ * dquot interface.
+ */
+extern void            xfs_dqlock(struct xfs_dquot *);
+extern void            xfs_dqunlock(struct xfs_dquot *);
+extern void            xfs_dqunlock_nonotify(struct xfs_dquot *);
+extern void            xfs_dqlock2(struct xfs_dquot *, struct xfs_dquot *);
+extern void            xfs_qm_dqput(struct xfs_dquot *);
+extern void            xfs_qm_dqrele(struct xfs_dquot *);
+extern xfs_dqid_t      xfs_qm_dqid(struct xfs_dquot *);
+extern int             xfs_qm_dqget(struct xfs_mount *, 
+                                    struct xfs_inode *, xfs_dqid_t,
+                                     uint, uint, struct xfs_dquot **);
+extern int             xfs_qm_dqcheck(struct xfs_disk_dquot *, 
+                                      xfs_dqid_t, uint, uint, char *);
+
+/*
+ * Vnodeops specific code that should actually be _in_ xfs_vnodeops.c, but
+ * is here because it's nicer to keep vnodeops (therefore, XFS) lean 
+ * and clean.
+ */
+extern struct xfs_dquot *      xfs_qm_vop_chown(struct xfs_trans *, 
+                                                struct xfs_inode *, 
+                                                struct xfs_dquot **,
+                                                struct xfs_dquot *);
+extern int             xfs_qm_vop_dqalloc(struct xfs_mount *,
+                                          struct xfs_inode *,
+                                          uid_t, xfs_prid_t, uint,
+                                          struct xfs_dquot     **,
+                                          struct xfs_dquot     **);
+
+extern int             xfs_qm_vop_chown_dqalloc(struct xfs_mount *,
+                                                struct xfs_inode *,
+                                                int, uid_t, xfs_prid_t,
+                                                struct xfs_dquot **,
+                                                struct xfs_dquot **);
+
+extern int             xfs_qm_vop_chown_reserve(struct xfs_trans *,
+                                                struct xfs_inode *,
+                                                struct xfs_dquot *,
+                                                struct xfs_dquot *,
+                                                uint);
+
+extern int             xfs_qm_vop_rename_dqattach(struct xfs_inode **);
+extern void            xfs_qm_vop_dqattach_and_dqmod_newinode(
+                                               struct xfs_trans *,
+                                               struct xfs_inode *,
+                                               struct xfs_dquot *,     
+                                               struct xfs_dquot *);
+
+
+/*
+ * Dquot Transaction interface
+ */
+extern void            xfs_trans_alloc_dqinfo(struct xfs_trans *);
+extern void            xfs_trans_free_dqinfo(struct xfs_trans *);
+extern void            xfs_trans_dup_dqinfo(struct xfs_trans *, 
+                                            struct xfs_trans *);
+extern void            xfs_trans_mod_dquot(struct xfs_trans *, 
+                                           struct xfs_dquot *,
+                                           uint, long);
+extern int             xfs_trans_mod_dquot_byino(struct xfs_trans *, 
+                                                 struct xfs_inode *,
+                                                 uint, long);
+extern void            xfs_trans_apply_dquot_deltas(struct xfs_trans *);
+extern void            xfs_trans_unreserve_and_mod_dquots(struct xfs_trans *);
+
+extern int             xfs_trans_reserve_quota_nblks(struct xfs_trans *,
+                                                     struct xfs_inode *,
+                                                     long, long, uint);
+
+
+extern int             xfs_trans_reserve_quota_bydquots(struct xfs_trans *,
+                                                        struct xfs_dquot *,
+                                                        struct xfs_dquot *,
+                                                        long, long, uint);
+extern void            xfs_trans_log_dquot(struct xfs_trans *,
+                                           struct xfs_dquot *);
+extern void            xfs_trans_dqjoin(struct xfs_trans *,
+                                        struct xfs_dquot *);
+extern void            xfs_qm_dqrele_all_inodes(struct xfs_mount *, uint);
+
+/* 
+ * Regular disk block quota reservations 
+ */
+#define        xfs_trans_reserve_blkquota(tp, ip, nblks) \
+xfs_trans_reserve_quota_nblks(tp, ip, nblks, 0, XFS_QMOPT_RES_REGBLKS)
+                                                 
+#define        xfs_trans_unreserve_blkquota(tp, ip, nblks) \
+xfs_trans_reserve_quota_nblks(tp, ip, -(nblks), 0, XFS_QMOPT_RES_REGBLKS)
+
+#define        xfs_trans_reserve_quota(tp, udq, pdq, nb, ni, f) \
+xfs_trans_reserve_quota_bydquots(tp, udq, pdq, nb, ni, f|XFS_QMOPT_RES_REGBLKS) 
+
+#define        xfs_trans_unreserve_quota(tp, ud, pd, b, i, f) \
+xfs_trans_reserve_quota_bydquots(tp, ud, pd, -(b), -(i), f|XFS_QMOPT_RES_REGBLKS)
+
+/*
+ * Realtime disk block quota reservations 
+ */
+#define        xfs_trans_reserve_rtblkquota(mp, tp, ip, nblks) \
+xfs_trans_reserve_quota_nblks(tp, ip, nblks, 0, XFS_QMOPT_RES_RTBLKS)
+                                                 
+#define        xfs_trans_unreserve_rtblkquota(tp, ip, nblks) \
+xfs_trans_reserve_quota_nblks(tp, ip, -(nblks), 0, XFS_QMOPT_RES_RTBLKS)
+
+#define        xfs_trans_reserve_rtquota(mp, tp, uq, pq, blks, f) \
+xfs_trans_reserve_quota_bydquots(mp, tp, uq, pq, blks, 0, f|XFS_QMOPT_RES_RTBLKS) 
+
+#define        xfs_trans_unreserve_rtquota(tp, uq, pq, blks) \
+xfs_trans_reserve_quota_bydquots(tp, uq, pq, -(blks), XFS_QMOPT_RES_RTBLKS)
+
+#endif /* __KERNEL__ */
+
+#endif /* __XFS_QUOTA_H__ */
diff --git a/include/xfs_rtalloc.h b/include/xfs_rtalloc.h
new file mode 100644 (file)
index 0000000..be2b88a
--- /dev/null
@@ -0,0 +1,164 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_RTALLOC_H__
+#define        __XFS_RTALLOC_H__
+
+struct xfs_mount;
+struct xfs_trans;
+
+/* Min and max rt extent sizes, specified in bytes */
+#define        XFS_MAX_RTEXTSIZE       (1024 * 1024 * 1024)    /* 1GB */
+#define        XFS_DFL_RTEXTSIZE       (64 * 1024)             /* 64KB */
+#define        XFS_MIN_RTEXTSIZE       (4 * 1024)              /* 4KB */
+
+/*
+ * Constants for bit manipulations.
+ */
+#define        XFS_NBBYLOG     3               /* log2(NBBY) */
+#define        XFS_WORDLOG     2               /* log2(sizeof(xfs_rtword_t)) */
+#define        XFS_NBWORDLOG   (XFS_NBBYLOG + XFS_WORDLOG)
+#define        XFS_NBWORD      (1 << XFS_NBWORDLOG)
+#define        XFS_WORDMASK    ((1 << XFS_WORDLOG) - 1)
+
+#define        XFS_BLOCKSIZE(mp)       ((mp)->m_sb.sb_blocksize)
+#define        XFS_BLOCKMASK(mp)       ((mp)->m_blockmask)
+#define        XFS_BLOCKWSIZE(mp)      ((mp)->m_blockwsize)
+#define        XFS_BLOCKWMASK(mp)      ((mp)->m_blockwmask)
+
+/*
+ * Summary and bit manipulation macros.
+ */
+#define        XFS_SUMOFFS(mp,ls,bb)   ((int)((ls) * (mp)->m_sb.sb_rbmblocks + (bb)))
+#define        XFS_SUMOFFSTOBLOCK(mp,s)        \
+       (((s) * (uint)sizeof(xfs_suminfo_t)) >> (mp)->m_sb.sb_blocklog)
+#define        XFS_SUMPTR(mp,bp,so)    \
+       ((xfs_suminfo_t *)((char *)XFS_BUF_PTR(bp) + \
+               (((so) * (uint)sizeof(xfs_suminfo_t)) & XFS_BLOCKMASK(mp))))
+
+#define        XFS_BITTOBLOCK(mp,bi)   ((bi) >> (mp)->m_blkbit_log)
+#define        XFS_BLOCKTOBIT(mp,bb)   ((bb) << (mp)->m_blkbit_log)
+#define        XFS_BITTOWORD(mp,bi)    \
+       ((int)(((bi) >> XFS_NBWORDLOG) & XFS_BLOCKWMASK(mp)))
+
+#define        XFS_RTMIN(a,b)  ((a) < (b) ? (a) : (b))
+#define        XFS_RTMAX(a,b)  ((a) > (b) ? (a) : (b))
+
+#define        XFS_RTLOBIT(w)  xfs_lowbit32(w)
+#define        XFS_RTHIBIT(w)  xfs_highbit32(w)
+
+#if XFS_BIG_FILESYSTEMS
+#define        XFS_RTBLOCKLOG(b)       xfs_highbit64(b)
+#else
+#define        XFS_RTBLOCKLOG(b)       xfs_highbit32(b)
+#endif
+
+/*
+ * Function prototypes for exported functions.
+ */
+
+/*
+ * Allocate an extent in the realtime subvolume, with the usual allocation
+ * parameters.  The length units are all in realtime extents, as is the
+ * result block number.
+ */
+int                                    /* error */
+xfs_rtallocate_extent(
+       struct xfs_trans        *tp,    /* transaction pointer */
+       xfs_rtblock_t           bno,    /* starting block number to allocate */
+       xfs_extlen_t            minlen, /* minimum length to allocate */
+       xfs_extlen_t            maxlen, /* maximum length to allocate */
+       xfs_extlen_t            *len,   /* out: actual length allocated */
+       xfs_alloctype_t         type,   /* allocation type XFS_ALLOCTYPE... */
+       int                     wasdel, /* was a delayed allocation extent */
+       xfs_extlen_t            prod,   /* extent product factor */
+       xfs_rtblock_t           *rtblock); /* out: start block allocated */
+
+/*
+ * Free an extent in the realtime subvolume.  Length is expressed in
+ * realtime extents, as is the block number.
+ */
+int                                    /* error */
+xfs_rtfree_extent(
+       struct xfs_trans        *tp,    /* transaction pointer */
+       xfs_rtblock_t           bno,    /* starting block number to free */
+       xfs_extlen_t            len);   /* length of extent freed */
+
+/*
+ * Initialize realtime fields in the mount structure.
+ */
+int                                    /* error */
+xfs_rtmount_init(
+       struct xfs_mount        *mp);   /* file system mount structure */
+
+/*
+ * Get the bitmap and summary inodes into the mount structure 
+ * at mount time.
+ */
+int                                    /* error */
+xfs_rtmount_inodes(
+       struct xfs_mount        *mp);   /* file system mount structure */
+
+/*
+ * Pick an extent for allocation at the start of a new realtime file.
+ * Use the sequence number stored in the atime field of the bitmap inode.
+ * Translate this to a fraction of the rtextents, and return the product
+ * of rtextents and the fraction.
+ * The fraction sequence is 0, 1/2, 1/4, 3/4, 1/8, ..., 7/8, 1/16, ...
+ */
+int                                    /* error */
+xfs_rtpick_extent(
+       struct xfs_mount        *mp,    /* file system mount point */
+       struct xfs_trans        *tp,    /* transaction pointer */
+       xfs_extlen_t            len,    /* allocation length (rtextents) */
+       xfs_rtblock_t           *pick); /* result rt extent */
+
+#ifdef XFSDEBUG
+/*
+ * Debug code: print out the value of a range in the bitmap.
+ */
+void
+xfs_rtprint_range(
+       struct xfs_mount        *mp,    /* file system mount structure */
+       struct xfs_trans        *tp,    /* transaction pointer */
+       xfs_rtblock_t           start,  /* starting block to print */
+       xfs_extlen_t            len);   /* length to print */
+
+/*
+ * Debug code: print the summary file.
+ */
+void
+xfs_rtprint_summary(
+       struct xfs_mount        *mp,    /* file system mount structure */
+       struct xfs_trans        *tp);   /* transaction pointer */
+#endif /* XFSDEBUG */
+
+#endif /* __XFS_RTALLOC_H__ */
diff --git a/include/xfs_sb.h b/include/xfs_sb.h
new file mode 100644 (file)
index 0000000..6526d10
--- /dev/null
@@ -0,0 +1,490 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_SB_H__
+#define        __XFS_SB_H__
+
+/*
+ * Super block
+ * Fits into a 512-byte buffer at daddr_t 0 of each allocation group.
+ * Only the first of these is ever updated except during growfs.
+ */
+
+struct xfs_buf;
+struct xfs_mount;
+
+#define        XFS_SB_MAGIC            0x58465342      /* 'XFSB' */
+#define        XFS_SB_VERSION_1        1               /* 5.3, 6.0.1, 6.1 */
+#define        XFS_SB_VERSION_2        2               /* 6.2 - attributes */
+#define        XFS_SB_VERSION_3        3               /* 6.2 - new inode version */
+#define        XFS_SB_VERSION_4        4               /* 6.2+ - bitmask version */
+#define        XFS_SB_VERSION_NUMBITS          0x000f
+#define        XFS_SB_VERSION_ALLFBITS         0xfff0
+#define        XFS_SB_VERSION_SASHFBITS        0xf000
+#define        XFS_SB_VERSION_REALFBITS        0x0ff0
+#define        XFS_SB_VERSION_ATTRBIT          0x0010
+#define        XFS_SB_VERSION_NLINKBIT         0x0020
+#define        XFS_SB_VERSION_QUOTABIT         0x0040
+#define        XFS_SB_VERSION_ALIGNBIT         0x0080
+#define        XFS_SB_VERSION_DALIGNBIT        0x0100
+#define        XFS_SB_VERSION_SHAREDBIT        0x0200
+#define        XFS_SB_VERSION_EXTFLGBIT        0x1000
+#define        XFS_SB_VERSION_DIRV2BIT         0x2000
+#define        XFS_SB_VERSION_OKSASHFBITS      \
+       (XFS_SB_VERSION_EXTFLGBIT | \
+        XFS_SB_VERSION_DIRV2BIT)
+#define        XFS_SB_VERSION_OKREALFBITS      \
+       (XFS_SB_VERSION_ATTRBIT | \
+        XFS_SB_VERSION_NLINKBIT | \
+        XFS_SB_VERSION_QUOTABIT | \
+        XFS_SB_VERSION_ALIGNBIT | \
+        XFS_SB_VERSION_DALIGNBIT | \
+        XFS_SB_VERSION_SHAREDBIT)
+#define        XFS_SB_VERSION_OKSASHBITS       \
+       (XFS_SB_VERSION_NUMBITS | \
+        XFS_SB_VERSION_REALFBITS | \
+        XFS_SB_VERSION_OKSASHFBITS)
+#define        XFS_SB_VERSION_OKREALBITS       \
+       (XFS_SB_VERSION_NUMBITS | \
+        XFS_SB_VERSION_OKREALFBITS | \
+        XFS_SB_VERSION_OKSASHFBITS)
+#define        XFS_SB_VERSION_MKFS(ia,dia,extflag,dirv2)       \
+       (((ia) || (dia) || (extflag) || (dirv2)) ? \
+               (XFS_SB_VERSION_4 | \
+                ((ia) ? XFS_SB_VERSION_ALIGNBIT : 0) | \
+                ((dia) ? XFS_SB_VERSION_DALIGNBIT : 0) | \
+                ((extflag) ? XFS_SB_VERSION_EXTFLGBIT : 0) | \
+                ((dirv2) ? XFS_SB_VERSION_DIRV2BIT : 0)) : \
+               XFS_SB_VERSION_1)
+
+typedef struct xfs_sb
+{
+       __uint32_t      sb_magicnum;    /* magic number == XFS_SB_MAGIC */
+       __uint32_t      sb_blocksize;   /* logical block size, bytes */
+       xfs_drfsbno_t   sb_dblocks;     /* number of data blocks */
+       xfs_drfsbno_t   sb_rblocks;     /* number of realtime blocks */
+       xfs_drtbno_t    sb_rextents;    /* number of realtime extents */
+       uuid_t          sb_uuid;        /* file system unique id */
+       xfs_dfsbno_t    sb_logstart;    /* starting block of log if internal */
+       xfs_ino_t       sb_rootino;     /* root inode number */
+       xfs_ino_t       sb_rbmino;      /* bitmap inode for realtime extents */
+       xfs_ino_t       sb_rsumino;     /* summary inode for rt bitmap */
+       xfs_agblock_t   sb_rextsize;    /* realtime extent size, blocks */
+       xfs_agblock_t   sb_agblocks;    /* size of an allocation group */
+       xfs_agnumber_t  sb_agcount;     /* number of allocation groups */
+       xfs_extlen_t    sb_rbmblocks;   /* number of rt bitmap blocks */
+       xfs_extlen_t    sb_logblocks;   /* number of log blocks */
+       __uint16_t      sb_versionnum;  /* header version == XFS_SB_VERSION */
+       __uint16_t      sb_sectsize;    /* volume sector size, bytes */
+       __uint16_t      sb_inodesize;   /* inode size, bytes */
+       __uint16_t      sb_inopblock;   /* inodes per block */
+       char            sb_fname[12];   /* file system name */
+       __uint8_t       sb_blocklog;    /* log2 of sb_blocksize */
+       __uint8_t       sb_sectlog;     /* log2 of sb_sectsize */
+       __uint8_t       sb_inodelog;    /* log2 of sb_inodesize */
+       __uint8_t       sb_inopblog;    /* log2 of sb_inopblock */
+       __uint8_t       sb_agblklog;    /* log2 of sb_agblocks (rounded up) */
+       __uint8_t       sb_rextslog;    /* log2 of sb_rextents */
+       __uint8_t       sb_inprogress;  /* mkfs is in progress, don't mount */
+       __uint8_t       sb_imax_pct;    /* max % of fs for inode space */
+                                       /* statistics */
+       /*
+        * These fields must remain contiguous.  If you really
+        * want to change their layout, make sure you fix the
+        * code in xfs_trans_apply_sb_deltas().
+        */
+       __uint64_t      sb_icount;      /* allocated inodes */
+       __uint64_t      sb_ifree;       /* free inodes */
+       __uint64_t      sb_fdblocks;    /* free data blocks */
+       __uint64_t      sb_frextents;   /* free realtime extents */
+       /*
+        * End contiguous fields.
+        */
+       xfs_ino_t       sb_uquotino;    /* user quota inode */
+       xfs_ino_t       sb_pquotino;    /* project quota inode */
+       __uint16_t      sb_qflags;      /* quota flags */
+       __uint8_t       sb_flags;       /* misc. flags */
+       __uint8_t       sb_shared_vn;   /* shared version number */
+       xfs_extlen_t    sb_inoalignmt;  /* inode chunk alignment, fsblocks */
+       __uint32_t      sb_unit;        /* stripe or raid unit */
+       __uint32_t      sb_width;       /* stripe or raid width */      
+       __uint8_t       sb_dirblklog;   /* log2 of dir block size (fsbs) */
+        __uint8_t       sb_dummy[7];    /* padding */
+} xfs_sb_t;
+
+/*
+ * Sequence number values for the fields.
+ */
+typedef enum {
+       XFS_SBS_MAGICNUM, XFS_SBS_BLOCKSIZE, XFS_SBS_DBLOCKS, XFS_SBS_RBLOCKS,
+       XFS_SBS_REXTENTS, XFS_SBS_UUID, XFS_SBS_LOGSTART, XFS_SBS_ROOTINO,
+       XFS_SBS_RBMINO, XFS_SBS_RSUMINO, XFS_SBS_REXTSIZE, XFS_SBS_AGBLOCKS,
+       XFS_SBS_AGCOUNT, XFS_SBS_RBMBLOCKS, XFS_SBS_LOGBLOCKS,
+       XFS_SBS_VERSIONNUM, XFS_SBS_SECTSIZE, XFS_SBS_INODESIZE,
+       XFS_SBS_INOPBLOCK, XFS_SBS_FNAME, XFS_SBS_BLOCKLOG,
+       XFS_SBS_SECTLOG, XFS_SBS_INODELOG, XFS_SBS_INOPBLOG, XFS_SBS_AGBLKLOG,
+       XFS_SBS_REXTSLOG, XFS_SBS_INPROGRESS, XFS_SBS_IMAX_PCT, XFS_SBS_ICOUNT,
+       XFS_SBS_IFREE, XFS_SBS_FDBLOCKS, XFS_SBS_FREXTENTS, XFS_SBS_UQUOTINO,
+       XFS_SBS_PQUOTINO, XFS_SBS_QFLAGS, XFS_SBS_FLAGS, XFS_SBS_SHARED_VN,
+       XFS_SBS_INOALIGNMT, XFS_SBS_UNIT, XFS_SBS_WIDTH, XFS_SBS_DIRBLKLOG,
+        XFS_SBS_DUMMY,
+       XFS_SBS_FIELDCOUNT
+} xfs_sb_field_t;
+
+/*
+ * Mask values, defined based on the xfs_sb_field_t values.
+ * Only define the ones we're using.
+ */
+#define        XFS_SB_MVAL(x)          (1LL << XFS_SBS_ ## x)
+#define        XFS_SB_UUID             XFS_SB_MVAL(UUID)
+#define        XFS_SB_FNAME            XFS_SB_MVAL(FNAME)
+#define        XFS_SB_ROOTINO          XFS_SB_MVAL(ROOTINO)
+#define        XFS_SB_RBMINO           XFS_SB_MVAL(RBMINO)
+#define        XFS_SB_RSUMINO          XFS_SB_MVAL(RSUMINO)
+#define        XFS_SB_VERSIONNUM       XFS_SB_MVAL(VERSIONNUM)
+#define XFS_SB_UQUOTINO                XFS_SB_MVAL(UQUOTINO)
+#define XFS_SB_PQUOTINO                XFS_SB_MVAL(PQUOTINO)
+#define XFS_SB_QFLAGS          XFS_SB_MVAL(QFLAGS)
+#define XFS_SB_SHARED_VN       XFS_SB_MVAL(SHARED_VN)
+#define XFS_SB_UNIT            XFS_SB_MVAL(UNIT)
+#define XFS_SB_WIDTH           XFS_SB_MVAL(WIDTH)
+#define        XFS_SB_NUM_BITS         ((int)XFS_SBS_FIELDCOUNT)
+#define        XFS_SB_ALL_BITS         ((1LL << XFS_SB_NUM_BITS) - 1)
+#define        XFS_SB_MOD_BITS         \
+       (XFS_SB_UUID | XFS_SB_ROOTINO | XFS_SB_RBMINO | XFS_SB_RSUMINO | \
+        XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | XFS_SB_PQUOTINO | \
+        XFS_SB_QFLAGS | XFS_SB_SHARED_VN | XFS_SB_UNIT | XFS_SB_WIDTH)
+
+/*
+ * Misc. Flags - warning - these will be cleared by xfs_repair unless
+ * a feature bit is set when the flag is used.
+ */
+#define XFS_SBF_NOFLAGS                0x00    /* no flags set */
+#define XFS_SBF_READONLY       0x01    /* only read-only mounts allowed */
+
+/*
+ * define max. shared version we can interoperate with
+ */
+#define XFS_SB_MAX_SHARED_VN   0
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_NUM)
+int xfs_sb_version_num(xfs_sb_t *sbp);
+#define        XFS_SB_VERSION_NUM(sbp) xfs_sb_version_num(sbp)
+#else
+#define        XFS_SB_VERSION_NUM(sbp) ((sbp)->sb_versionnum & XFS_SB_VERSION_NUMBITS)
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_GOOD_VERSION)
+int xfs_sb_good_version(xfs_sb_t *sbp);
+#define        XFS_SB_GOOD_VERSION(sbp)        xfs_sb_good_version(sbp)
+#else
+#define        XFS_SB_GOOD_VERSION_INT(sbp)    \
+       ((((sbp)->sb_versionnum >= XFS_SB_VERSION_1) && \
+         ((sbp)->sb_versionnum <= XFS_SB_VERSION_3)) || \
+        ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
+         !((sbp)->sb_versionnum & ~XFS_SB_VERSION_OKREALBITS)
+#ifdef __KERNEL__
+#define        XFS_SB_GOOD_VERSION(sbp)        \
+       (XFS_SB_GOOD_VERSION_INT(sbp) && \
+         (sbp)->sb_shared_vn <= XFS_SB_MAX_SHARED_VN) ))
+#else
+/*
+ * extra 2 paren's here (( to unconfuse paren-matching editors
+ * like vi because XFS_SB_GOOD_VERSION_INT is a partial expression
+ * and the two XFS_SB_GOOD_VERSION's each 2 more close paren's to
+ * complete the expression.
+ */
+#define XFS_SB_GOOD_VERSION(sbp)       \
+       (XFS_SB_GOOD_VERSION_INT(sbp) && \
+         (!((sbp)->sb_versionnum & XFS_SB_VERSION_SHAREDBIT) || \
+          (sbp)->sb_shared_vn <= XFS_SB_MAX_SHARED_VN)) ))
+#endif /* __KERNEL__ */
+#endif
+
+#define        XFS_SB_GOOD_SASH_VERSION(sbp)   \
+       ((((sbp)->sb_versionnum >= XFS_SB_VERSION_1) && \
+         ((sbp)->sb_versionnum <= XFS_SB_VERSION_3)) || \
+        ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
+         !((sbp)->sb_versionnum & ~XFS_SB_VERSION_OKSASHBITS)))
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_TONEW)
+unsigned xfs_sb_version_tonew(unsigned v);
+#define        XFS_SB_VERSION_TONEW(v) xfs_sb_version_tonew(v)
+#else
+#define        XFS_SB_VERSION_TONEW(v) \
+       ((((v) == XFS_SB_VERSION_1) ? \
+               0 : \
+               (((v) == XFS_SB_VERSION_2) ? \
+                       XFS_SB_VERSION_ATTRBIT : \
+                       (XFS_SB_VERSION_ATTRBIT | XFS_SB_VERSION_NLINKBIT))) | \
+        XFS_SB_VERSION_4)
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_TOOLD)
+unsigned xfs_sb_version_toold(unsigned v);
+#define        XFS_SB_VERSION_TOOLD(v) xfs_sb_version_toold(v)
+#else
+#define        XFS_SB_VERSION_TOOLD(v) \
+       (((v) & (XFS_SB_VERSION_QUOTABIT | XFS_SB_VERSION_ALIGNBIT)) ? \
+               0 : \
+               (((v) & XFS_SB_VERSION_NLINKBIT) ? \
+                       XFS_SB_VERSION_3 : \
+                       (((v) & XFS_SB_VERSION_ATTRBIT) ?  \
+                               XFS_SB_VERSION_2 : \
+                               XFS_SB_VERSION_1)))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_HASATTR)
+int xfs_sb_version_hasattr(xfs_sb_t *sbp);
+#define        XFS_SB_VERSION_HASATTR(sbp)     xfs_sb_version_hasattr(sbp)
+#else
+#define        XFS_SB_VERSION_HASATTR(sbp)     \
+       (((sbp)->sb_versionnum == XFS_SB_VERSION_2) || \
+        ((sbp)->sb_versionnum == XFS_SB_VERSION_3) || \
+        ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
+         ((sbp)->sb_versionnum & XFS_SB_VERSION_ATTRBIT)))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_ADDATTR)
+void xfs_sb_version_addattr(xfs_sb_t *sbp);
+#define        XFS_SB_VERSION_ADDATTR(sbp)     xfs_sb_version_addattr(sbp)
+#else
+#define        XFS_SB_VERSION_ADDATTR(sbp)     \
+       ((sbp)->sb_versionnum = \
+        (((sbp)->sb_versionnum == XFS_SB_VERSION_1) ? \
+               XFS_SB_VERSION_2 : \
+               ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) ? \
+                       ((sbp)->sb_versionnum | XFS_SB_VERSION_ATTRBIT) : \
+                       (XFS_SB_VERSION_4 | XFS_SB_VERSION_ATTRBIT))))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_HASNLINK)
+int xfs_sb_version_hasnlink(xfs_sb_t *sbp);
+#define        XFS_SB_VERSION_HASNLINK(sbp)    xfs_sb_version_hasnlink(sbp)
+#else
+#define        XFS_SB_VERSION_HASNLINK(sbp)    \
+       (((sbp)->sb_versionnum == XFS_SB_VERSION_3) || \
+        ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
+         ((sbp)->sb_versionnum & XFS_SB_VERSION_NLINKBIT)))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_ADDNLINK)
+void xfs_sb_version_addnlink(xfs_sb_t *sbp);
+#define        XFS_SB_VERSION_ADDNLINK(sbp)    xfs_sb_version_addnlink(sbp)
+#else
+#define        XFS_SB_VERSION_ADDNLINK(sbp)    \
+       ((sbp)->sb_versionnum = \
+        ((sbp)->sb_versionnum <= XFS_SB_VERSION_2 ? \
+               XFS_SB_VERSION_3 : \
+               ((sbp)->sb_versionnum | XFS_SB_VERSION_NLINKBIT)))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_HASQUOTA)
+int xfs_sb_version_hasquota(xfs_sb_t *sbp);
+#define        XFS_SB_VERSION_HASQUOTA(sbp)    xfs_sb_version_hasquota(sbp)
+#else
+#define        XFS_SB_VERSION_HASQUOTA(sbp)    \
+       ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
+        ((sbp)->sb_versionnum & XFS_SB_VERSION_QUOTABIT))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_ADDQUOTA)
+void xfs_sb_version_addquota(xfs_sb_t *sbp);
+#define        XFS_SB_VERSION_ADDQUOTA(sbp)    xfs_sb_version_addquota(sbp)
+#else
+#define        XFS_SB_VERSION_ADDQUOTA(sbp)    \
+       ((sbp)->sb_versionnum = \
+        (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 ? \
+               ((sbp)->sb_versionnum | XFS_SB_VERSION_QUOTABIT) : \
+               (XFS_SB_VERSION_TONEW((sbp)->sb_versionnum) | \
+                XFS_SB_VERSION_QUOTABIT)))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_HASALIGN)
+int xfs_sb_version_hasalign(xfs_sb_t *sbp);
+#define        XFS_SB_VERSION_HASALIGN(sbp)    xfs_sb_version_hasalign(sbp)
+#else
+#define        XFS_SB_VERSION_HASALIGN(sbp)    \
+       ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
+        ((sbp)->sb_versionnum & XFS_SB_VERSION_ALIGNBIT))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_SUBALIGN)
+void xfs_sb_version_subalign(xfs_sb_t *sbp);
+#define        XFS_SB_VERSION_SUBALIGN(sbp)    xfs_sb_version_subalign(sbp)
+#else
+#define        XFS_SB_VERSION_SUBALIGN(sbp)    \
+       ((sbp)->sb_versionnum = \
+        XFS_SB_VERSION_TOOLD((sbp)->sb_versionnum & ~XFS_SB_VERSION_ALIGNBIT))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_HASDALIGN)
+int xfs_sb_version_hasdalign(xfs_sb_t *sbp);
+#define XFS_SB_VERSION_HASDALIGN(sbp)  xfs_sb_version_hasdalign(sbp)
+#else
+#define XFS_SB_VERSION_HASDALIGN(sbp)  \
+       ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
+        ((sbp)->sb_versionnum & XFS_SB_VERSION_DALIGNBIT))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_ADDDALIGN)
+int xfs_sb_version_adddalign(xfs_sb_t *sbp);
+#define XFS_SB_VERSION_ADDDALIGN(sbp)  xfs_sb_version_adddalign(sbp)
+#else
+#define XFS_SB_VERSION_ADDDALIGN(sbp)  \
+        ((sbp)->sb_versionnum = \
+                ((sbp)->sb_versionnum | XFS_SB_VERSION_DALIGNBIT))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_HASSHARED)
+int xfs_sb_version_hasshared(xfs_sb_t *sbp);
+#define XFS_SB_VERSION_HASSHARED(sbp)  xfs_sb_version_hasshared(sbp)
+#else
+#define XFS_SB_VERSION_HASSHARED(sbp)  \
+        ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
+        ((sbp)->sb_versionnum & XFS_SB_VERSION_SHAREDBIT))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_ADDSHARED)
+int xfs_sb_version_addshared(xfs_sb_t *sbp);
+#define XFS_SB_VERSION_ADDSHARED(sbp)  xfs_sb_version_addshared(sbp)
+#else
+#define XFS_SB_VERSION_ADDSHARED(sbp)  \
+        ((sbp)->sb_versionnum = \
+                ((sbp)->sb_versionnum | XFS_SB_VERSION_SHAREDBIT))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_SUBSHARED)
+int xfs_sb_version_subshared(xfs_sb_t *sbp);
+#define XFS_SB_VERSION_SUBSHARED(sbp)  xfs_sb_version_subshared(sbp)
+#else
+#define XFS_SB_VERSION_SUBSHARED(sbp)  \
+        ((sbp)->sb_versionnum = \
+                ((sbp)->sb_versionnum & ~XFS_SB_VERSION_SHAREDBIT))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_HASDIRV2)
+int xfs_sb_version_hasdirv2(xfs_sb_t *sbp);
+#define XFS_SB_VERSION_HASDIRV2(sbp)   xfs_sb_version_hasdirv2(sbp)
+#else
+#define XFS_SB_VERSION_HASDIRV2(sbp)   \
+        ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
+        ((sbp)->sb_versionnum & XFS_SB_VERSION_DIRV2BIT))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_HASEXTFLGBIT)
+int xfs_sb_version_hasextflgbit(xfs_sb_t *sbp);
+#define XFS_SB_VERSION_HASEXTFLGBIT(sbp)       xfs_sb_version_hasextflgbit(sbp)
+#else
+#define XFS_SB_VERSION_HASEXTFLGBIT(sbp)       \
+        ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
+        ((sbp)->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_ADDEXTFLGBIT)
+int xfs_sb_version_addextflgbit(xfs_sb_t *sbp);
+#define XFS_SB_VERSION_ADDEXTFLGBIT(sbp)       xfs_sb_version_addextflgbit(sbp)
+#else
+#define XFS_SB_VERSION_ADDEXTFLGBIT(sbp)       \
+        ((sbp)->sb_versionnum = \
+                ((sbp)->sb_versionnum | XFS_SB_VERSION_EXTFLGBIT))
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_SUBEXTFLGBIT)
+int xfs_sb_version_subextflgbit(xfs_sb_t *sbp);
+#define XFS_SB_VERSION_SUBEXTFLGBIT(sbp)       xfs_sb_version_subextflgbit(sbp)
+#else
+#define XFS_SB_VERSION_SUBEXTFLGBIT(sbp)       \
+        ((sbp)->sb_versionnum = \
+                ((sbp)->sb_versionnum & ~XFS_SB_VERSION_EXTFLGBIT))
+#endif
+
+/*
+ * end of superblock version macros
+ */
+
+#define        XFS_SB_DADDR    ((xfs_daddr_t)0)                /* daddr in filesystem/ag */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_BLOCK)
+xfs_agblock_t xfs_sb_block(struct xfs_mount *mp);
+#define        XFS_SB_BLOCK(mp)        xfs_sb_block(mp)
+#else
+#define        XFS_SB_BLOCK(mp)        XFS_HDR_BLOCK(mp, XFS_SB_DADDR)
+#endif
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_HDR_BLOCK)
+xfs_agblock_t xfs_hdr_block(struct xfs_mount *mp, xfs_daddr_t d);
+#define        XFS_HDR_BLOCK(mp,d)     xfs_hdr_block(mp,d)
+#else
+#define        XFS_HDR_BLOCK(mp,d)     ((xfs_agblock_t)(XFS_BB_TO_FSBT(mp,d)))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DADDR_TO_FSB)
+xfs_fsblock_t xfs_daddr_to_fsb(struct xfs_mount *mp, xfs_daddr_t d);
+#define        XFS_DADDR_TO_FSB(mp,d)          xfs_daddr_to_fsb(mp,d)
+#else
+#define        XFS_DADDR_TO_FSB(mp,d) \
+       XFS_AGB_TO_FSB(mp, XFS_DADDR_TO_AGNO(mp,d), XFS_DADDR_TO_AGBNO(mp,d))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_FSB_TO_DADDR)
+xfs_daddr_t xfs_fsb_to_daddr(struct xfs_mount *mp, xfs_fsblock_t fsbno);
+#define        XFS_FSB_TO_DADDR(mp,fsbno)      xfs_fsb_to_daddr(mp,fsbno)
+#else
+#define        XFS_FSB_TO_DADDR(mp,fsbno) \
+       XFS_AGB_TO_DADDR(mp, XFS_FSB_TO_AGNO(mp,fsbno), \
+                        XFS_FSB_TO_AGBNO(mp,fsbno))
+#endif
+
+/*
+ * File system block to basic block conversions.
+ */
+#define        XFS_FSB_TO_BB(mp,fsbno) ((fsbno) << (mp)->m_blkbb_log)
+#define        XFS_BB_TO_FSB(mp,bb)    \
+       (((bb) + (XFS_FSB_TO_BB(mp,1) - 1)) >> (mp)->m_blkbb_log)
+#define        XFS_BB_TO_FSBT(mp,bb)   ((bb) >> (mp)->m_blkbb_log)
+#define        XFS_BB_FSB_OFFSET(mp,bb) ((bb) & ((mp)->m_bsize - 1))
+
+/*
+ * File system block to byte conversions.
+ */
+#define        XFS_FSB_TO_B(mp,fsbno)  ((xfs_fsize_t)(fsbno) << \
+                                (mp)->m_sb.sb_blocklog)
+#define        XFS_B_TO_FSB(mp,b)      \
+       ((((__uint64_t)(b)) + (mp)->m_blockmask) >> (mp)->m_sb.sb_blocklog)
+#define        XFS_B_TO_FSBT(mp,b)     (((__uint64_t)(b)) >> (mp)->m_sb.sb_blocklog)
+#define        XFS_B_FSB_OFFSET(mp,b)  ((b) & (mp)->m_blockmask)     
+
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BUF_TO_SBP)
+xfs_sb_t *xfs_buf_to_sbp(struct xfs_buf *bp);
+#define        XFS_BUF_TO_SBP(bp)      xfs_buf_to_sbp(bp)
+#else
+#define        XFS_BUF_TO_SBP(bp)      ((xfs_sb_t *)XFS_BUF_PTR(bp))
+#endif
+
+#endif /* __XFS_SB_H__ */
diff --git a/include/xfs_trans.h b/include/xfs_trans.h
new file mode 100644 (file)
index 0000000..49fbc0a
--- /dev/null
@@ -0,0 +1,1000 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef        __XFS_TRANS_H__
+#define        __XFS_TRANS_H__
+
+/*
+ * This is the structure written in the log at the head of
+ * every transaction. It identifies the type and id of the
+ * transaction, and contains the number of items logged by
+ * the transaction so we know how many to expect during recovery.
+ *
+ * Do not change the below structure without redoing the code in
+ * xlog_recover_add_to_trans() and xlog_recover_add_to_cont_trans().
+ */
+typedef struct xfs_trans_header {
+       uint            th_magic;               /* magic number */
+       uint            th_type;                /* transaction type */
+       __int32_t       th_tid;                 /* transaction id (unused) */
+       uint            th_num_items;           /* num items logged by trans */
+} xfs_trans_header_t;
+
+#define        XFS_TRANS_HEADER_MAGIC  0x5452414e      /* TRAN */
+
+/*
+ * Log item types.
+ */
+#define        XFS_LI_5_3_BUF          0x1234  /* v1 bufs, 1-block inode buffers */
+#define        XFS_LI_5_3_INODE        0x1235  /* 1-block inode buffers */
+#define        XFS_LI_EFI              0x1236
+#define        XFS_LI_EFD              0x1237
+#define        XFS_LI_IUNLINK          0x1238
+#define        XFS_LI_6_1_INODE        0x1239  /* 4K non-aligned inode bufs */
+#define        XFS_LI_6_1_BUF          0x123a  /* v1, 4K inode buffers */
+#define        XFS_LI_INODE            0x123b  /* aligned ino chunks, var-size ibufs */
+#define        XFS_LI_BUF              0x123c  /* v2 bufs, variable sized inode bufs */
+#define        XFS_LI_DQUOT            0x123d
+#define        XFS_LI_QUOTAOFF         0x123e
+#define        XFS_LI_RPC              0x123f  /* CXFS RPC return info */
+
+/*
+ * Transaction types.  Used to distinguish types of buffers.
+ */
+#define XFS_TRANS_SETATTR_NOT_SIZE     1
+#define XFS_TRANS_SETATTR_SIZE         2
+#define XFS_TRANS_INACTIVE             3
+#define XFS_TRANS_CREATE               4
+#define XFS_TRANS_CREATE_TRUNC         5
+#define XFS_TRANS_TRUNCATE_FILE                6
+#define XFS_TRANS_REMOVE               7
+#define XFS_TRANS_LINK                 8
+#define XFS_TRANS_RENAME               9
+#define XFS_TRANS_MKDIR                        10
+#define XFS_TRANS_RMDIR                        11
+#define XFS_TRANS_SYMLINK              12
+#define XFS_TRANS_SET_DMATTRS          13
+#define XFS_TRANS_GROWFS               14
+#define XFS_TRANS_STRAT_WRITE          15
+#define XFS_TRANS_DIOSTRAT             16
+#define        XFS_TRANS_WRITE_SYNC            17
+#define        XFS_TRANS_WRITEID               18
+#define        XFS_TRANS_ADDAFORK              19
+#define        XFS_TRANS_ATTRINVAL             20
+#define        XFS_TRANS_ATRUNCATE             21
+#define        XFS_TRANS_ATTR_SET              22
+#define        XFS_TRANS_ATTR_RM               23
+#define        XFS_TRANS_ATTR_FLAG             24
+#define        XFS_TRANS_CLEAR_AGI_BUCKET      25
+#define XFS_TRANS_QM_SBCHANGE          26
+/*
+ * Dummy entries since we use the transaction type to index into the
+ * trans_type[] in xlog_recover_print_trans_head()
+ */
+#define XFS_TRANS_DUMMY1               27
+#define XFS_TRANS_DUMMY2               28
+#define XFS_TRANS_QM_QUOTAOFF          29
+#define XFS_TRANS_QM_DQALLOC           30
+#define XFS_TRANS_QM_SETQLIM           31
+#define XFS_TRANS_QM_DQCLUSTER         32
+#define XFS_TRANS_QM_QINOCREATE                33
+#define XFS_TRANS_QM_QUOTAOFF_END      34
+#define XFS_TRANS_SB_UNIT              35
+#define XFS_TRANS_FSYNC_TS             36
+#define        XFS_TRANS_GROWFSRT_ALLOC        37
+#define        XFS_TRANS_GROWFSRT_ZERO         38
+#define        XFS_TRANS_GROWFSRT_FREE         39
+#define        XFS_TRANS_SWAPEXT               40
+/* new transaction types need to be reflected in xfs_logprint(8) */
+
+
+#ifdef __KERNEL__
+struct xfs_buf;
+struct buftarg;
+struct xfs_efd_log_item;
+struct xfs_efi_log_item;
+struct xfs_inode;
+struct xfs_item_ops;
+struct xfs_log_iovec;
+struct xfs_log_item;
+struct xfs_log_item_desc;
+struct xfs_mount;
+struct xfs_trans;
+struct xfs_dquot_acct;
+
+typedef struct xfs_ail_entry {
+       struct xfs_log_item     *ail_forw;      /* AIL forw pointer */
+       struct xfs_log_item     *ail_back;      /* AIL back pointer */
+} xfs_ail_entry_t;
+
+/*
+ * This structure is passed as a parameter to xfs_trans_push_ail()
+ * and is used to track the what LSN the waiting processes are
+ * waiting to become unused.
+ */
+typedef struct xfs_ail_ticket {
+       xfs_lsn_t               at_lsn;         /* lsn waitin for */
+       struct xfs_ail_ticket   *at_forw;       /* wait list ptr */
+       struct xfs_ail_ticket   *at_back;       /* wait list ptr */
+       sv_t                    at_sema;        /* wait sema */
+} xfs_ail_ticket_t;
+
+
+typedef struct xfs_log_item {
+       xfs_ail_entry_t                 li_ail;         /* AIL pointers */
+       xfs_lsn_t                       li_lsn;         /* last on-disk lsn */
+       struct xfs_log_item_desc        *li_desc;       /* ptr to current desc*/
+       struct xfs_mount                *li_mountp;     /* ptr to fs mount */
+       uint                            li_type;        /* item type */
+       uint                            li_flags;       /* misc flags */
+       struct xfs_log_item             *li_bio_list;   /* buffer item list */
+       void                            (*li_cb)(struct xfs_buf *,
+                                                struct xfs_log_item *);
+                                                       /* buffer item iodone */
+                                                       /* callback func */
+       struct xfs_item_ops             *li_ops;        /* function list */
+} xfs_log_item_t;
+
+#define        XFS_LI_IN_AIL   0x1
+#define XFS_LI_ABORTED 0x2
+
+typedef struct xfs_item_ops {
+       uint (*iop_size)(xfs_log_item_t *);
+       void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *);
+       void (*iop_pin)(xfs_log_item_t *);
+       void (*iop_unpin)(xfs_log_item_t *);
+       void (*iop_unpin_remove)(xfs_log_item_t *, struct xfs_trans *);
+       uint (*iop_trylock)(xfs_log_item_t *);
+       void (*iop_unlock)(xfs_log_item_t *);
+       xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t);
+       void (*iop_push)(xfs_log_item_t *);
+       void (*iop_abort)(xfs_log_item_t *);
+       void (*iop_pushbuf)(xfs_log_item_t *);
+       void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t);
+} xfs_item_ops_t;
+
+#define        IOP_SIZE(ip)            (*(ip)->li_ops->iop_size)(ip)
+#define        IOP_FORMAT(ip,vp)       (*(ip)->li_ops->iop_format)(ip, vp)
+#define        IOP_PIN(ip)             (*(ip)->li_ops->iop_pin)(ip)
+#define        IOP_UNPIN(ip)           (*(ip)->li_ops->iop_unpin)(ip)
+#define        IOP_UNPIN_REMOVE(ip,tp) (*(ip)->li_ops->iop_unpin_remove)(ip, tp)
+#define        IOP_TRYLOCK(ip)         (*(ip)->li_ops->iop_trylock)(ip)
+#define        IOP_UNLOCK(ip)          (*(ip)->li_ops->iop_unlock)(ip)
+#define        IOP_COMMITTED(ip, lsn)  (*(ip)->li_ops->iop_committed)(ip, lsn)
+#define        IOP_PUSH(ip)            (*(ip)->li_ops->iop_push)(ip)
+#define        IOP_ABORT(ip)           (*(ip)->li_ops->iop_abort)(ip)
+#define IOP_PUSHBUF(ip)         (*(ip)->li_ops->iop_pushbuf)(ip)
+#define IOP_COMMITTING(ip, lsn) (*(ip)->li_ops->iop_committing)(ip, lsn)
+
+/*
+ * Return values for the IOP_TRYLOCK() routines.
+ */
+#define        XFS_ITEM_SUCCESS        0
+#define        XFS_ITEM_PINNED         1
+#define        XFS_ITEM_LOCKED         2
+#define        XFS_ITEM_FLUSHING       3
+#define XFS_ITEM_PUSHBUF       4
+
+#endif /* __KERNEL__ */
+
+/*
+ * This structure is used to track log items associated with
+ * a transaction.  It points to the log item and keeps some
+ * flags to track the state of the log item.  It also tracks
+ * the amount of space needed to log the item it describes
+ * once we get to commit processing (see xfs_trans_commit()).
+ */
+typedef struct xfs_log_item_desc {
+       xfs_log_item_t  *lid_item;
+       ushort          lid_size;
+       unsigned char   lid_flags;
+       unsigned char   lid_index;
+} xfs_log_item_desc_t;
+
+#define        XFS_LID_DIRTY           0x1
+#define        XFS_LID_PINNED          0x2
+#define        XFS_LID_SYNC_UNLOCK     0x4
+
+/*
+ * This structure is used to maintain a chunk list of log_item_desc
+ * structures. The free field is a bitmask indicating which descriptors
+ * in this chunk's array are free.  The unused field is the first value
+ * not used since this chunk was allocated.
+ */
+#define        XFS_LIC_NUM_SLOTS       15
+typedef struct xfs_log_item_chunk {
+       struct xfs_log_item_chunk       *lic_next;
+       ushort                          lic_free;
+       ushort                          lic_unused;
+       xfs_log_item_desc_t             lic_descs[XFS_LIC_NUM_SLOTS];
+} xfs_log_item_chunk_t;
+
+#define        XFS_LIC_MAX_SLOT        (XFS_LIC_NUM_SLOTS - 1)
+#define        XFS_LIC_FREEMASK        ((1 << XFS_LIC_NUM_SLOTS) - 1)
+
+
+/*
+ * Initialize the given chunk.  Set the chunk's free descriptor mask
+ * to indicate that all descriptors are free.  The caller gets to set
+ * lic_unused to the right value (0 matches all free).  The
+ * lic_descs.lid_index values are set up as each desc is allocated.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LIC_INIT)
+void xfs_lic_init(xfs_log_item_chunk_t *cp);
+#define        XFS_LIC_INIT(cp)        xfs_lic_init(cp)
+#else
+#define        XFS_LIC_INIT(cp)        ((cp)->lic_free = XFS_LIC_FREEMASK)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LIC_INIT_SLOT)
+void xfs_lic_init_slot(xfs_log_item_chunk_t *cp, int slot);
+#define        XFS_LIC_INIT_SLOT(cp,slot)      xfs_lic_init_slot(cp, slot)
+#else
+#define        XFS_LIC_INIT_SLOT(cp,slot)      \
+       ((cp)->lic_descs[slot].lid_index = (unsigned char)(slot))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LIC_VACANCY)
+int xfs_lic_vacancy(xfs_log_item_chunk_t *cp);
+#define        XFS_LIC_VACANCY(cp)             xfs_lic_vacancy(cp)
+#else
+#define        XFS_LIC_VACANCY(cp)             (((cp)->lic_free) & XFS_LIC_FREEMASK)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LIC_ALL_FREE)
+void xfs_lic_all_free(xfs_log_item_chunk_t *cp);
+#define        XFS_LIC_ALL_FREE(cp)            xfs_lic_all_free(cp)
+#else
+#define        XFS_LIC_ALL_FREE(cp)            ((cp)->lic_free = XFS_LIC_FREEMASK)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LIC_ARE_ALL_FREE)
+int xfs_lic_are_all_free(xfs_log_item_chunk_t *cp);
+#define        XFS_LIC_ARE_ALL_FREE(cp)        xfs_lic_are_all_free(cp)
+#else
+#define        XFS_LIC_ARE_ALL_FREE(cp)        (((cp)->lic_free & XFS_LIC_FREEMASK) ==\
+                                       XFS_LIC_FREEMASK)
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LIC_ISFREE)
+int xfs_lic_isfree(xfs_log_item_chunk_t *cp, int slot);
+#define        XFS_LIC_ISFREE(cp,slot) xfs_lic_isfree(cp,slot)
+#else
+#define        XFS_LIC_ISFREE(cp,slot) ((cp)->lic_free & (1 << (slot)))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LIC_CLAIM)
+void xfs_lic_claim(xfs_log_item_chunk_t *cp, int slot);
+#define        XFS_LIC_CLAIM(cp,slot)          xfs_lic_claim(cp,slot)
+#else
+#define        XFS_LIC_CLAIM(cp,slot)          ((cp)->lic_free &= ~(1 << (slot)))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LIC_RELSE)
+void xfs_lic_relse(xfs_log_item_chunk_t *cp, int slot);
+#define        XFS_LIC_RELSE(cp,slot)          xfs_lic_relse(cp,slot)
+#else
+#define        XFS_LIC_RELSE(cp,slot)          ((cp)->lic_free |= 1 << (slot))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LIC_SLOT)
+xfs_log_item_desc_t *xfs_lic_slot(xfs_log_item_chunk_t *cp, int slot);
+#define        XFS_LIC_SLOT(cp,slot)           xfs_lic_slot(cp,slot)
+#else
+#define        XFS_LIC_SLOT(cp,slot)           (&((cp)->lic_descs[slot]))
+#endif
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LIC_DESC_TO_SLOT)
+int xfs_lic_desc_to_slot(xfs_log_item_desc_t *dp);
+#define        XFS_LIC_DESC_TO_SLOT(dp)        xfs_lic_desc_to_slot(dp)
+#else
+#define        XFS_LIC_DESC_TO_SLOT(dp)        ((uint)((dp)->lid_index))
+#endif
+/*
+ * Calculate the address of a chunk given a descriptor pointer:
+ * dp - dp->lid_index give the address of the start of the lic_descs array.
+ * From this we subtract the offset of the lic_descs field in a chunk.
+ * All of this yields the address of the chunk, which is
+ * cast to a chunk pointer.
+ */
+#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LIC_DESC_TO_CHUNK)
+xfs_log_item_chunk_t *xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp);
+#define        XFS_LIC_DESC_TO_CHUNK(dp)       xfs_lic_desc_to_chunk(dp)
+#else
+#define        XFS_LIC_DESC_TO_CHUNK(dp)       ((xfs_log_item_chunk_t*) \
+                                       (((xfs_caddr_t)((dp) - (dp)->lid_index)) -\
+                                       (xfs_caddr_t)(((xfs_log_item_chunk_t*) \
+                                       0)->lic_descs)))
+#endif
+
+#ifdef __KERNEL__
+/*
+ * This is the type of function which can be given to xfs_trans_callback()
+ * to be called upon the transaction's commit to disk.
+ */
+typedef void (*xfs_trans_callback_t)(struct xfs_trans *, void *);
+
+/*
+ * This is the structure maintained for every active transaction.
+ */
+typedef struct xfs_trans {
+       unsigned int            t_magic;        /* magic number */
+       xfs_log_callback_t      t_logcb;        /* log callback struct */
+       struct xfs_trans        *t_forw;        /* async list pointers */
+       struct xfs_trans        *t_back;        /* async list pointers */
+       unsigned int            t_type;         /* transaction type */
+       unsigned int            t_log_res;      /* amt of log space resvd */
+       unsigned int            t_log_count;    /* count for perm log res */
+       unsigned int            t_blk_res;      /* # of blocks resvd */
+       unsigned int            t_blk_res_used; /* # of resvd blocks used */
+       unsigned int            t_rtx_res;      /* # of rt extents resvd */
+       unsigned int            t_rtx_res_used; /* # of resvd rt extents used */
+       xfs_log_ticket_t        t_ticket;       /* log mgr ticket */
+       sema_t                  t_sema;         /* sema for commit completion */
+       xfs_lsn_t               t_lsn;          /* log seq num of trans commit*/
+       struct xfs_mount        *t_mountp;      /* ptr to fs mount struct */
+       struct xfs_dquot_acct   *t_dqinfo;      /* accting info for dquots */
+       xfs_trans_callback_t    t_callback;     /* transaction callback */
+       void                    *t_callarg;     /* callback arg */
+       unsigned int            t_flags;        /* misc flags */
+       long                    t_icount_delta; /* superblock icount change */
+       long                    t_ifree_delta;  /* superblock ifree change */
+       long                    t_fdblocks_delta; /* superblock fdblocks chg */
+       long                    t_res_fdblocks_delta; /* on-disk only chg */
+       long                    t_frextents_delta;/* superblock freextents chg*/
+       long                    t_res_frextents_delta; /* on-disk only chg */
+       long                    t_ag_freeblks_delta; /* debugging counter */
+       long                    t_ag_flist_delta; /* debugging counter */
+       long                    t_ag_btree_delta; /* debugging counter */
+       long                    t_dblocks_delta;/* superblock dblocks change */
+       long                    t_agcount_delta;/* superblock agcount change */
+       long                    t_imaxpct_delta;/* superblock imaxpct change */
+       long                    t_rextsize_delta;/* superblock rextsize chg */
+       long                    t_rbmblocks_delta;/* superblock rbmblocks chg */
+       long                    t_rblocks_delta;/* superblock rblocks change */
+       long                    t_rextents_delta;/* superblocks rextents chg */
+       long                    t_rextslog_delta;/* superblocks rextslog chg */
+       unsigned int            t_items_free;   /* log item descs free */
+       xfs_log_item_chunk_t    t_items;        /* first log item desc chunk */
+       xfs_trans_header_t      t_header;       /* header for in-log trans */
+} xfs_trans_t;
+
+#endif /* __KERNEL__ */
+
+
+#define        XFS_TRANS_MAGIC         0x5452414E      /* 'TRAN' */
+/*
+ * Values for t_flags.
+ */
+#define        XFS_TRANS_DIRTY         0x01    /* something needs to be logged */
+#define        XFS_TRANS_SB_DIRTY      0x02    /* superblock is modified */
+#define        XFS_TRANS_PERM_LOG_RES  0x04    /* xact took a permanent log res */
+#define        XFS_TRANS_SYNC          0x08    /* make commit synchronous */
+#define XFS_TRANS_DQ_DIRTY     0x10    /* at least one dquot in trx dirty */
+#define XFS_TRANS_RESERVE      0x20    /* OK to use reserved data blocks */
+
+/*
+ * Values for call flags parameter.
+ */
+#define        XFS_TRANS_NOSLEEP               0x1
+#define        XFS_TRANS_WAIT                  0x2
+#define        XFS_TRANS_RELEASE_LOG_RES       0x4
+#define        XFS_TRANS_ABORT                 0x8
+
+/*
+ * Field values for xfs_trans_mod_sb.
+ */
+#define        XFS_TRANS_SB_ICOUNT             0x00000001
+#define        XFS_TRANS_SB_IFREE              0x00000002
+#define        XFS_TRANS_SB_FDBLOCKS           0x00000004
+#define        XFS_TRANS_SB_RES_FDBLOCKS       0x00000008
+#define        XFS_TRANS_SB_FREXTENTS          0x00000010
+#define        XFS_TRANS_SB_RES_FREXTENTS      0x00000020
+#define        XFS_TRANS_SB_DBLOCKS            0x00000040
+#define        XFS_TRANS_SB_AGCOUNT            0x00000080
+#define        XFS_TRANS_SB_IMAXPCT            0x00000100
+#define        XFS_TRANS_SB_REXTSIZE           0x00000200
+#define        XFS_TRANS_SB_RBMBLOCKS          0x00000400
+#define        XFS_TRANS_SB_RBLOCKS            0x00000800
+#define        XFS_TRANS_SB_REXTENTS           0x00001000
+#define        XFS_TRANS_SB_REXTSLOG           0x00002000
+
+
+/*
+ * Various log reservation values.
+ * These are based on the size of the file system block
+ * because that is what most transactions manipulate.
+ * Each adds in an additional 128 bytes per item logged to
+ * try to account for the overhead of the transaction mechanism.
+ *
+ * Note:
+ * Most of the reservations underestimate the number of allocation
+ * groups into which they could free extents in the xfs_bmap_finish()
+ * call.  This is because the number in the worst case is quite high
+ * and quite unusual.  In order to fix this we need to change
+ * xfs_bmap_finish() to free extents in only a single AG at a time.
+ * This will require changes to the EFI code as well, however, so that
+ * the EFI for the extents not freed is logged again in each transaction.
+ * See bug 261917.
+ */
+
+/*
+ * Per-extent log reservation for the allocation btree changes
+ * involved in freeing or allocating an extent.
+ * 2 trees * (2 blocks/level * max depth - 1) * block size
+ */
+#define        XFS_ALLOCFREE_LOG_RES(mp,nx) \
+       ((nx) * (2 * XFS_FSB_TO_B((mp), 2 * XFS_AG_MAXLEVELS(mp) - 1)))
+#define        XFS_ALLOCFREE_LOG_COUNT(mp,nx) \
+       ((nx) * (2 * (2 * XFS_AG_MAXLEVELS(mp) - 1)))
+
+/*
+ * Per-directory log reservation for any directory change.
+ * dir blocks: (1 btree block per level + data block + free block) * dblock size
+ * bmap btree: (levels + 2) * max depth * block size
+ * v2 directory blocks can be fragmented below the dirblksize down to the fsb
+ * size, so account for that in the DAENTER macros.
+ */
+#define        XFS_DIROP_LOG_RES(mp)   \
+       (XFS_FSB_TO_B(mp, XFS_DAENTER_BLOCKS(mp, XFS_DATA_FORK)) + \
+        (XFS_FSB_TO_B(mp, XFS_DAENTER_BMAPS(mp, XFS_DATA_FORK) + 1)))
+#define        XFS_DIROP_LOG_COUNT(mp) \
+       (XFS_DAENTER_BLOCKS(mp, XFS_DATA_FORK) + \
+        XFS_DAENTER_BMAPS(mp, XFS_DATA_FORK) + 1)
+
+/*
+ * In a write transaction we can allocate a maximum of 2
+ * extents.  This gives:
+ *    the inode getting the new extents: inode size
+ *    the inode\'s bmap btree: max depth * block size
+ *    the agfs of the ags from which the extents are allocated: 2 * sector
+ *    the superblock free block counter: sector size
+ *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
+ * And the bmap_finish transaction can free bmap blocks in a join:
+ *    the agfs of the ags containing the blocks: 2 * sector size
+ *    the agfls of the ags containing the blocks: 2 * sector size
+ *    the super block free block counter: sector size
+ *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
+ */
+#define XFS_CALC_WRITE_LOG_RES(mp) \
+       (MAX( \
+        ((mp)->m_sb.sb_inodesize + \
+         XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) + \
+         (2 * (mp)->m_sb.sb_sectsize) + \
+         (mp)->m_sb.sb_sectsize + \
+         XFS_ALLOCFREE_LOG_RES(mp, 2) + \
+         (128 * (4 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + XFS_ALLOCFREE_LOG_COUNT(mp, 2)))),\
+        ((2 * (mp)->m_sb.sb_sectsize) + \
+         (2 * (mp)->m_sb.sb_sectsize) + \
+         (mp)->m_sb.sb_sectsize + \
+         XFS_ALLOCFREE_LOG_RES(mp, 2) + \
+         (128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))))))
+
+#define        XFS_WRITE_LOG_RES(mp)   ((mp)->m_reservations.tr_write)
+
+/*
+ * In truncating a file we free up to two extents at once.  We can modify:
+ *    the inode being truncated: inode size
+ *    the inode\'s bmap btree: (max depth + 1) * block size
+ * And the bmap_finish transaction can free the blocks and bmap blocks:
+ *    the agf for each of the ags: 4 * sector size
+ *    the agfl for each of the ags: 4 * sector size
+ *    the super block to reflect the freed blocks: sector size
+ *    worst case split in allocation btrees per extent assuming 4 extents:
+ *             4 exts * 2 trees * (2 * max depth - 1) * block size
+ */
+#define        XFS_CALC_ITRUNCATE_LOG_RES(mp) \
+       (MAX( \
+        ((mp)->m_sb.sb_inodesize + \
+         XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1) + \
+         (128 * (2 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)))), \
+        ((4 * (mp)->m_sb.sb_sectsize) + \
+         (4 * (mp)->m_sb.sb_sectsize) + \
+         (mp)->m_sb.sb_sectsize + \
+         XFS_ALLOCFREE_LOG_RES(mp, 4) + \
+         (128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4))))))
+
+#define        XFS_ITRUNCATE_LOG_RES(mp)   ((mp)->m_reservations.tr_itruncate)
+
+/*
+ * In renaming a files we can modify:
+ *    the four inodes involved: 4 * inode size
+ *    the two directory btrees: 2 * (max depth + v2) * dir block size
+ *    the two directory bmap btrees: 2 * max depth * block size
+ * And the bmap_finish transaction can free dir and bmap blocks (two sets
+ *     of bmap blocks) giving:
+ *    the agf for the ags in which the blocks live: 3 * sector size
+ *    the agfl for the ags in which the blocks live: 3 * sector size
+ *    the superblock for the free block count: sector size
+ *    the allocation btrees: 3 exts * 2 trees * (2 * max depth - 1) * block size
+ */
+#define        XFS_CALC_RENAME_LOG_RES(mp) \
+       (MAX( \
+        ((4 * (mp)->m_sb.sb_inodesize) + \
+         (2 * XFS_DIROP_LOG_RES(mp)) + \
+         (128 * (4 + 2 * XFS_DIROP_LOG_COUNT(mp)))), \
+        ((3 * (mp)->m_sb.sb_sectsize) + \
+         (3 * (mp)->m_sb.sb_sectsize) + \
+         (mp)->m_sb.sb_sectsize + \
+         XFS_ALLOCFREE_LOG_RES(mp, 3) + \
+         (128 * (7 + XFS_ALLOCFREE_LOG_COUNT(mp, 3))))))
+
+#define        XFS_RENAME_LOG_RES(mp)  ((mp)->m_reservations.tr_rename)
+
+/*
+ * For creating a link to an inode:
+ *    the parent directory inode: inode size
+ *    the linked inode: inode size
+ *    the directory btree could split: (max depth + v2) * dir block size
+ *    the directory bmap btree could join or split: (max depth + v2) * blocksize
+ * And the bmap_finish transaction can free some bmap blocks giving:
+ *    the agf for the ag in which the blocks live: sector size
+ *    the agfl for the ag in which the blocks live: sector size
+ *    the superblock for the free block count: sector size
+ *    the allocation btrees: 2 trees * (2 * max depth - 1) * block size
+ */
+#define        XFS_CALC_LINK_LOG_RES(mp) \
+       (MAX( \
+        ((mp)->m_sb.sb_inodesize + \
+         (mp)->m_sb.sb_inodesize + \
+         XFS_DIROP_LOG_RES(mp) + \
+         (128 * (2 + XFS_DIROP_LOG_COUNT(mp)))), \
+        ((mp)->m_sb.sb_sectsize + \
+         (mp)->m_sb.sb_sectsize + \
+         (mp)->m_sb.sb_sectsize + \
+         XFS_ALLOCFREE_LOG_RES(mp, 1) + \
+         (128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1))))))
+
+#define        XFS_LINK_LOG_RES(mp)    ((mp)->m_reservations.tr_link)
+
+/*
+ * For removing a directory entry we can modify:
+ *    the parent directory inode: inode size
+ *    the removed inode: inode size
+ *    the directory btree could join: (max depth + v2) * dir block size
+ *    the directory bmap btree could join or split: (max depth + v2) * blocksize
+ * And the bmap_finish transaction can free the dir and bmap blocks giving:
+ *    the agf for the ag in which the blocks live: 2 * sector size
+ *    the agfl for the ag in which the blocks live: 2 * sector size
+ *    the superblock for the free block count: sector size
+ *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
+ */
+#define        XFS_CALC_REMOVE_LOG_RES(mp)     \
+       (MAX( \
+        ((mp)->m_sb.sb_inodesize + \
+         (mp)->m_sb.sb_inodesize + \
+         XFS_DIROP_LOG_RES(mp) + \
+         (128 * (2 + XFS_DIROP_LOG_COUNT(mp)))), \
+        ((2 * (mp)->m_sb.sb_sectsize) + \
+         (2 * (mp)->m_sb.sb_sectsize) + \
+         (mp)->m_sb.sb_sectsize + \
+         XFS_ALLOCFREE_LOG_RES(mp, 2) + \
+         (128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))))))
+
+#define        XFS_REMOVE_LOG_RES(mp)  ((mp)->m_reservations.tr_remove)
+
+/*
+ * For symlink we can modify:
+ *    the parent directory inode: inode size
+ *    the new inode: inode size
+ *    the inode btree entry: 1 block
+ *    the directory btree: (max depth + v2) * dir block size
+ *    the directory inode\'s bmap btree: (max depth + v2) * block size
+ *    the blocks for the symlink: 1 KB
+ * Or in the first xact we allocate some inodes giving:
+ *    the agi and agf of the ag getting the new inodes: 2 * sectorsize
+ *    the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize
+ *    the inode btree: max depth * blocksize
+ *    the allocation btrees: 2 trees * (2 * max depth - 1) * block size
+ */
+#define        XFS_CALC_SYMLINK_LOG_RES(mp)            \
+       (MAX( \
+        ((mp)->m_sb.sb_inodesize + \
+         (mp)->m_sb.sb_inodesize + \
+         XFS_FSB_TO_B(mp, 1) + \
+         XFS_DIROP_LOG_RES(mp) + \
+         1024 + \
+         (128 * (4 + XFS_DIROP_LOG_COUNT(mp)))), \
+        (2 * (mp)->m_sb.sb_sectsize + \
+         XFS_FSB_TO_B((mp), XFS_IALLOC_BLOCKS((mp))) + \
+         XFS_FSB_TO_B((mp), XFS_IN_MAXLEVELS(mp)) + \
+         XFS_ALLOCFREE_LOG_RES(mp, 1) + \
+         (128 * (2 + XFS_IALLOC_BLOCKS(mp) + XFS_IN_MAXLEVELS(mp) + \
+          XFS_ALLOCFREE_LOG_COUNT(mp, 1))))))
+
+#define        XFS_SYMLINK_LOG_RES(mp) ((mp)->m_reservations.tr_symlink)
+
+/*
+ * For create we can modify:
+ *    the parent directory inode: inode size
+ *    the new inode: inode size
+ *    the inode btree entry: block size
+ *    the superblock for the nlink flag: sector size
+ *    the directory btree: (max depth + v2) * dir block size
+ *    the directory inode\'s bmap btree: (max depth + v2) * block size
+ * Or in the first xact we allocate some inodes giving:
+ *    the agi and agf of the ag getting the new inodes: 2 * sectorsize
+ *    the superblock for the nlink flag: sector size
+ *    the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize
+ *    the inode btree: max depth * blocksize
+ *    the allocation btrees: 2 trees * (max depth - 1) * block size
+ */
+#define        XFS_CALC_CREATE_LOG_RES(mp)             \
+       (MAX( \
+        ((mp)->m_sb.sb_inodesize + \
+         (mp)->m_sb.sb_inodesize + \
+         (mp)->m_sb.sb_sectsize + \
+         XFS_FSB_TO_B(mp, 1) + \
+         XFS_DIROP_LOG_RES(mp) + \
+         (128 * (3 + XFS_DIROP_LOG_COUNT(mp)))), \
+        (3 * (mp)->m_sb.sb_sectsize + \
+         XFS_FSB_TO_B((mp), XFS_IALLOC_BLOCKS((mp))) + \
+         XFS_FSB_TO_B((mp), XFS_IN_MAXLEVELS(mp)) + \
+         XFS_ALLOCFREE_LOG_RES(mp, 1) + \
+         (128 * (2 + XFS_IALLOC_BLOCKS(mp) + XFS_IN_MAXLEVELS(mp) + \
+          XFS_ALLOCFREE_LOG_COUNT(mp, 1))))))
+
+#define        XFS_CREATE_LOG_RES(mp)  ((mp)->m_reservations.tr_create)
+
+/*
+ * Making a new directory is the same as creating a new file.
+ */
+#define        XFS_CALC_MKDIR_LOG_RES(mp)      XFS_CALC_CREATE_LOG_RES(mp)
+
+#define        XFS_MKDIR_LOG_RES(mp)   ((mp)->m_reservations.tr_mkdir)
+
+/*
+ * In freeing an inode we can modify:
+ *    the inode being freed: inode size
+ *    the super block free inode counter: sector size
+ *    the agi hash list and counters: sector size
+ *    the inode btree entry: block size
+ *    the on disk inode before ours in the agi hash list: inode cluster size
+ */
+#define        XFS_CALC_IFREE_LOG_RES(mp) \
+       ((mp)->m_sb.sb_inodesize + \
+        (mp)->m_sb.sb_sectsize + \
+        (mp)->m_sb.sb_sectsize + \
+        XFS_FSB_TO_B((mp), 1) + \
+        MAX(XFS_FSB_TO_B((mp), 1), XFS_INODE_CLUSTER_SIZE(mp)) + \
+        (128 * 5))
+
+#define        XFS_IFREE_LOG_RES(mp)   ((mp)->m_reservations.tr_ifree)
+
+/*
+ * When only changing the inode we log the inode and possibly the superblock
+ * We also add a bit of slop for the transaction stuff.
+ */
+#define        XFS_CALC_ICHANGE_LOG_RES(mp)    ((mp)->m_sb.sb_inodesize + \
+                                        (mp)->m_sb.sb_sectsize + 512)
+
+#define        XFS_ICHANGE_LOG_RES(mp) ((mp)->m_reservations.tr_ichange)
+
+/*
+ * Growing the data section of the filesystem.
+ *     superblock
+ *     agi and agf
+ *     allocation btrees
+ */
+#define        XFS_CALC_GROWDATA_LOG_RES(mp) \
+       ((mp)->m_sb.sb_sectsize * 3 + \
+        XFS_ALLOCFREE_LOG_RES(mp, 1) + \
+        (128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1))))
+
+#define        XFS_GROWDATA_LOG_RES(mp)    ((mp)->m_reservations.tr_growdata)
+
+/*
+ * Growing the rt section of the filesystem.
+ * In the first set of transactions (ALLOC) we allocate space to the
+ * bitmap or summary files.
+ *     superblock: sector size
+ *     agf of the ag from which the extent is allocated: sector size
+ *     bmap btree for bitmap/summary inode: max depth * blocksize
+ *     bitmap/summary inode: inode size
+ *     allocation btrees for 1 block alloc: 2 * (2 * maxdepth - 1) * blocksize
+ */
+#define        XFS_CALC_GROWRTALLOC_LOG_RES(mp) \
+       (2 * (mp)->m_sb.sb_sectsize + \
+        XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) + \
+        (mp)->m_sb.sb_inodesize + \
+        XFS_ALLOCFREE_LOG_RES(mp, 1) + \
+        (128 * \
+         (3 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + \
+          XFS_ALLOCFREE_LOG_COUNT(mp, 1))))
+
+#define        XFS_GROWRTALLOC_LOG_RES(mp)     ((mp)->m_reservations.tr_growrtalloc)
+
+/*
+ * Growing the rt section of the filesystem.
+ * In the second set of transactions (ZERO) we zero the new metadata blocks.
+ *     one bitmap/summary block: blocksize
+ */
+#define        XFS_CALC_GROWRTZERO_LOG_RES(mp) \
+       ((mp)->m_sb.sb_blocksize + 128)
+
+#define        XFS_GROWRTZERO_LOG_RES(mp)      ((mp)->m_reservations.tr_growrtzero)
+
+/*
+ * Growing the rt section of the filesystem.
+ * In the third set of transactions (FREE) we update metadata without
+ * allocating any new blocks.
+ *     superblock: sector size
+ *     bitmap inode: inode size
+ *     summary inode: inode size
+ *     one bitmap block: blocksize
+ *     summary blocks: new summary size
+ */
+#define        XFS_CALC_GROWRTFREE_LOG_RES(mp) \
+       ((mp)->m_sb.sb_sectsize + \
+        2 * (mp)->m_sb.sb_inodesize + \
+        (mp)->m_sb.sb_blocksize + \
+        (mp)->m_rsumsize + \
+        (128 * 5))
+
+#define        XFS_GROWRTFREE_LOG_RES(mp)      ((mp)->m_reservations.tr_growrtfree)
+
+/*
+ * Logging the inode modification timestamp on a synchronous write.
+ *     inode
+ */
+#define        XFS_CALC_SWRITE_LOG_RES(mp) \
+       ((mp)->m_sb.sb_inodesize + 128)
+
+#define        XFS_SWRITE_LOG_RES(mp)  ((mp)->m_reservations.tr_swrite)
+
+/*
+ * Logging the inode timestamps on an fsync -- same as SWRITE
+ * as long as SWRITE logs the entire inode core
+ */
+#define XFS_FSYNC_TS_LOG_RES(mp)        ((mp)->m_reservations.tr_swrite)
+
+/*
+ * Logging the inode mode bits when writing a setuid/setgid file
+ *     inode
+ */
+#define        XFS_CALC_WRITEID_LOG_RES(mp) \
+       ((mp)->m_sb.sb_inodesize + 128)
+
+#define        XFS_WRITEID_LOG_RES(mp) ((mp)->m_reservations.tr_swrite)
+
+/*
+ * Converting the inode from non-attributed to attributed.
+ *     the inode being converted: inode size
+ *     agf block and superblock (for block allocation)
+ *     the new block (directory sized)
+ *     bmap blocks for the new directory block
+ *     allocation btrees
+ */
+#define        XFS_CALC_ADDAFORK_LOG_RES(mp)   \
+       ((mp)->m_sb.sb_inodesize + \
+        (mp)->m_sb.sb_sectsize * 2 + \
+        (mp)->m_dirblksize + \
+        (XFS_DIR_IS_V1(mp) ? 0 : \
+           XFS_FSB_TO_B(mp, (XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1))) + \
+        XFS_ALLOCFREE_LOG_RES(mp, 1) + \
+        (128 * (4 + \
+                (XFS_DIR_IS_V1(mp) ? 0 : \
+                        XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1) + \
+                XFS_ALLOCFREE_LOG_COUNT(mp, 1))))
+
+#define        XFS_ADDAFORK_LOG_RES(mp)        ((mp)->m_reservations.tr_addafork)
+
+/*
+ * Removing the attribute fork of a file
+ *    the inode being truncated: inode size
+ *    the inode\'s bmap btree: max depth * block size
+ * And the bmap_finish transaction can free the blocks and bmap blocks:
+ *    the agf for each of the ags: 4 * sector size
+ *    the agfl for each of the ags: 4 * sector size
+ *    the super block to reflect the freed blocks: sector size
+ *    worst case split in allocation btrees per extent assuming 4 extents:
+ *             4 exts * 2 trees * (2 * max depth - 1) * block size
+ */
+#define        XFS_CALC_ATTRINVAL_LOG_RES(mp)  \
+       (MAX( \
+        ((mp)->m_sb.sb_inodesize + \
+         XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + \
+         (128 * (1 + XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)))), \
+        ((4 * (mp)->m_sb.sb_sectsize) + \
+         (4 * (mp)->m_sb.sb_sectsize) + \
+         (mp)->m_sb.sb_sectsize + \
+         XFS_ALLOCFREE_LOG_RES(mp, 4) + \
+         (128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4))))))
+
+#define        XFS_ATTRINVAL_LOG_RES(mp)       ((mp)->m_reservations.tr_attrinval)
+
+/*
+ * Setting an attribute.
+ *     the inode getting the attribute
+ *     the superblock for allocations
+ *     the agfs extents are allocated from
+ *     the attribute btree * max depth
+ *     the inode allocation btree
+ * Since attribute transaction space is dependent on the size of the attribute,
+ * the calculation is done partially at mount time and partially at runtime.
+ */
+#define        XFS_CALC_ATTRSET_LOG_RES(mp)    \
+       ((mp)->m_sb.sb_inodesize + \
+        (mp)->m_sb.sb_sectsize + \
+         XFS_FSB_TO_B((mp), XFS_DA_NODE_MAXDEPTH) + \
+          (128 * (2 + XFS_DA_NODE_MAXDEPTH)))
+
+#define        XFS_ATTRSET_LOG_RES(mp, ext)    \
+       ((mp)->m_reservations.tr_attrset + \
+        (ext * (mp)->m_sb.sb_sectsize) + \
+        (ext * XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK))) + \
+        (128 * (ext + (ext * XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)))))
+
+/*
+ * Removing an attribute.
+ *    the inode: inode size
+ *    the attribute btree could join: max depth * block size
+ *    the inode bmap btree could join or split: max depth * block size
+ * And the bmap_finish transaction can free the attr blocks freed giving:
+ *    the agf for the ag in which the blocks live: 2 * sector size
+ *    the agfl for the ag in which the blocks live: 2 * sector size
+ *    the superblock for the free block count: sector size
+ *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
+ */
+#define        XFS_CALC_ATTRRM_LOG_RES(mp)     \
+       (MAX( \
+         ((mp)->m_sb.sb_inodesize + \
+         XFS_FSB_TO_B((mp), XFS_DA_NODE_MAXDEPTH) + \
+         XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + \
+         (128 * (1 + XFS_DA_NODE_MAXDEPTH + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)))), \
+        ((2 * (mp)->m_sb.sb_sectsize) + \
+         (2 * (mp)->m_sb.sb_sectsize) + \
+         (mp)->m_sb.sb_sectsize + \
+         XFS_ALLOCFREE_LOG_RES(mp, 2) + \
+         (128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))))))
+
+#define        XFS_ATTRRM_LOG_RES(mp)  ((mp)->m_reservations.tr_attrrm)
+
+/*
+ * Clearing a bad agino number in an agi hash bucket.
+ */
+#define        XFS_CALC_CLEAR_AGI_BUCKET_LOG_RES(mp) \
+       ((mp)->m_sb.sb_sectsize + 128)
+
+#define        XFS_CLEAR_AGI_BUCKET_LOG_RES(mp)  ((mp)->m_reservations.tr_clearagi)
+
+
+/*
+ * Various log count values.
+ */
+#define        XFS_DEFAULT_LOG_COUNT           1
+#define        XFS_DEFAULT_PERM_LOG_COUNT      2
+#define        XFS_ITRUNCATE_LOG_COUNT         2
+#define        XFS_CREATE_LOG_COUNT            2
+#define        XFS_MKDIR_LOG_COUNT             3
+#define        XFS_SYMLINK_LOG_COUNT           3
+#define        XFS_REMOVE_LOG_COUNT            2
+#define        XFS_LINK_LOG_COUNT              2
+#define        XFS_RENAME_LOG_COUNT            2
+#define        XFS_WRITE_LOG_COUNT             2
+#define        XFS_ADDAFORK_LOG_COUNT          2
+#define        XFS_ATTRINVAL_LOG_COUNT         1
+#define        XFS_ATTRSET_LOG_COUNT           3
+#define        XFS_ATTRRM_LOG_COUNT            3
+
+/*
+ * Here we centralize the specification of XFS meta-data buffer
+ * reference count values.  This determine how hard the buffer
+ * cache tries to hold onto the buffer.
+ */
+#define        XFS_AGF_REF             4
+#define        XFS_AGI_REF             4
+#define        XFS_AGFL_REF            3
+#define        XFS_INO_BTREE_REF       3
+#define        XFS_ALLOC_BTREE_REF     2
+#define        XFS_BMAP_BTREE_REF      2
+#define        XFS_DIR_BTREE_REF       2
+#define        XFS_ATTR_BTREE_REF      1
+#define        XFS_INO_REF             1
+#define        XFS_DQUOT_REF           1
+
+#ifdef __KERNEL__
+/*
+ * XFS transaction mechanism exported interfaces that are
+ * actually macros.
+ */
+#define        xfs_trans_get_log_res(tp)       ((tp)->t_log_res)
+#define        xfs_trans_get_log_count(tp)     ((tp)->t_log_count)
+#define        xfs_trans_get_block_res(tp)     ((tp)->t_blk_res)
+#define        xfs_trans_set_sync(tp)          ((tp)->t_flags |= XFS_TRANS_SYNC)
+
+#ifdef DEBUG
+#define        xfs_trans_agblocks_delta(tp, d) ((tp)->t_ag_freeblks_delta += (long)d)
+#define        xfs_trans_agflist_delta(tp, d)  ((tp)->t_ag_flist_delta += (long)d)
+#define        xfs_trans_agbtree_delta(tp, d)  ((tp)->t_ag_btree_delta += (long)d)
+#else
+#define        xfs_trans_agblocks_delta(tp, d)
+#define        xfs_trans_agflist_delta(tp, d)
+#define        xfs_trans_agbtree_delta(tp, d)
+#endif
+
+/*
+ * XFS transaction mechanism exported interfaces.
+ */
+void           xfs_trans_init(struct xfs_mount *);
+xfs_trans_t    *xfs_trans_alloc(struct xfs_mount *, uint);
+xfs_trans_t    *xfs_trans_dup(xfs_trans_t *);
+int            xfs_trans_reserve(xfs_trans_t *, uint, uint, uint,
+                                 uint, uint);
+void           xfs_trans_callback(xfs_trans_t *,
+                                  void (*)(xfs_trans_t *, void *), void *);
+void           xfs_trans_mod_sb(xfs_trans_t *, uint, long);
+struct xfs_buf *xfs_trans_get_buf(xfs_trans_t *, struct buftarg *, xfs_daddr_t,
+                                  int, uint);
+int            xfs_trans_read_buf(struct xfs_mount *, xfs_trans_t *,
+                                  struct buftarg *, xfs_daddr_t, int, uint,
+                                  struct xfs_buf **);
+struct xfs_buf *xfs_trans_getsb(xfs_trans_t *, struct xfs_mount *, int);
+
+void           xfs_trans_brelse(xfs_trans_t *, struct xfs_buf *);
+void           xfs_trans_bjoin(xfs_trans_t *, struct xfs_buf *);
+void           xfs_trans_bhold(xfs_trans_t *, struct xfs_buf *);
+void           xfs_trans_bhold_until_committed(xfs_trans_t *, struct xfs_buf *);
+void           xfs_trans_binval(xfs_trans_t *, struct xfs_buf *);
+void           xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *);
+void           xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint);
+void           xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *);
+int            xfs_trans_iget(struct xfs_mount *, xfs_trans_t *,
+                              xfs_ino_t , uint, struct xfs_inode **);
+void           xfs_trans_iput(xfs_trans_t *, struct xfs_inode *, uint);
+void           xfs_trans_ijoin(xfs_trans_t *, struct xfs_inode *, uint);
+void           xfs_trans_ihold(xfs_trans_t *, struct xfs_inode *);
+void           xfs_trans_ihold_release(xfs_trans_t *, struct xfs_inode *);
+void           xfs_trans_log_buf(xfs_trans_t *, struct xfs_buf *, uint, uint);
+void           xfs_trans_log_inode(xfs_trans_t *, struct xfs_inode *, uint);
+struct xfs_efi_log_item        *xfs_trans_get_efi(xfs_trans_t *, uint);
+void           xfs_efi_release(struct xfs_efi_log_item *, uint);
+void           xfs_trans_log_efi_extent(xfs_trans_t *,
+                                        struct xfs_efi_log_item *,
+                                        xfs_fsblock_t,
+                                        xfs_extlen_t);
+struct xfs_efd_log_item        *xfs_trans_get_efd(xfs_trans_t *,
+                                 struct xfs_efi_log_item *,
+                                 uint);
+void           xfs_trans_log_efd_extent(xfs_trans_t *,
+                                        struct xfs_efd_log_item *,
+                                        xfs_fsblock_t,
+                                        xfs_extlen_t);
+void           xfs_trans_log_create_rpc(xfs_trans_t *, int, xfs_ino_t);
+void           xfs_trans_log_setattr_rpc(xfs_trans_t *, int); 
+int            xfs_trans_commit(xfs_trans_t *, uint flags, xfs_lsn_t *);
+void           xfs_trans_commit_async(struct xfs_mount *);
+void           xfs_trans_cancel(xfs_trans_t *, int);
+void           xfs_trans_ail_init(struct xfs_mount *);
+xfs_lsn_t      xfs_trans_push_ail(struct xfs_mount *, xfs_lsn_t);
+xfs_lsn_t      xfs_trans_tail_ail(struct xfs_mount *);
+void           xfs_trans_unlocked_item(struct xfs_mount *,
+                                       xfs_log_item_t *);
+
+/*
+ * Not necessarily exported, but used outside a single file.
+ */
+int            xfs_trans_lsn_danger(struct xfs_mount *, xfs_lsn_t);
+
+#endif /* __KERNEL__ */
+
+#endif /* __XFS_TRANS_H__ */
diff --git a/include/xfs_trans_space.h b/include/xfs_trans_space.h
new file mode 100644 (file)
index 0000000..c377a44
--- /dev/null
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_TRANS_SPACE_H__
+#define __XFS_TRANS_SPACE_H__
+
+/*
+ * Components of space reservations.
+ */
+#define XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp)    \
+                (((mp)->m_alloc_mxr[0]) - ((mp)->m_alloc_mnr[0]))
+#define        XFS_EXTENTADD_SPACE_RES(mp,w)   (XFS_BM_MAXLEVELS(mp,w) - 1)
+#define XFS_NEXTENTADD_SPACE_RES(mp,b,w)\
+        (((b + XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp) - 1) / \
+          XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp)) * \
+          XFS_EXTENTADD_SPACE_RES(mp,w))
+#define        XFS_DAENTER_1B(mp,w)    ((w) == XFS_DATA_FORK ? (mp)->m_dirblkfsbs : 1)
+#define        XFS_DAENTER_DBS(mp,w)   \
+       (XFS_DA_NODE_MAXDEPTH + \
+        ((XFS_DIR_IS_V2(mp) && (w) == XFS_DATA_FORK) ? 2 : 0))
+#define        XFS_DAENTER_BLOCKS(mp,w)        \
+       (XFS_DAENTER_1B(mp,w) * XFS_DAENTER_DBS(mp,w))
+#define        XFS_DAENTER_BMAP1B(mp,w)        \
+       XFS_NEXTENTADD_SPACE_RES(mp, XFS_DAENTER_1B(mp, w), w)
+#define        XFS_DAENTER_BMAPS(mp,w)         \
+       (XFS_DAENTER_DBS(mp,w) * XFS_DAENTER_BMAP1B(mp,w))
+#define        XFS_DAENTER_SPACE_RES(mp,w)     \
+       (XFS_DAENTER_BLOCKS(mp,w) + XFS_DAENTER_BMAPS(mp,w))
+#define        XFS_DAREMOVE_SPACE_RES(mp,w)    XFS_DAENTER_BMAPS(mp,w)
+#define        XFS_DIRENTER_MAX_SPLIT(mp,nl)   \
+       (((mp)->m_sb.sb_blocksize == 512 && \
+         XFS_DIR_IS_V1(mp) && \
+         (nl) >= XFS_DIR_LEAF_CAN_DOUBLE_SPLIT_LEN) ? 2 : 1)
+#define        XFS_DIRENTER_SPACE_RES(mp,nl)   \
+       (XFS_DAENTER_SPACE_RES(mp, XFS_DATA_FORK) * \
+        XFS_DIRENTER_MAX_SPLIT(mp,nl))
+#define        XFS_DIRREMOVE_SPACE_RES(mp)     \
+       XFS_DAREMOVE_SPACE_RES(mp, XFS_DATA_FORK)
+#define        XFS_IALLOC_SPACE_RES(mp)        \
+       (XFS_IALLOC_BLOCKS(mp) + XFS_IN_MAXLEVELS(mp)-1)
+
+/*
+ * Space reservation values for various transactions.
+ */
+#define        XFS_ADDAFORK_SPACE_RES(mp)      \
+       ((mp)->m_dirblkfsbs + \
+        (XFS_DIR_IS_V1(mp) ? 0 : XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK)))
+#define        XFS_ATTRRM_SPACE_RES(mp)        \
+       XFS_DAREMOVE_SPACE_RES(mp, XFS_ATTR_FORK)
+/* This macro is not used - see inline code in xfs_attr_set */
+#define        XFS_ATTRSET_SPACE_RES(mp, v)    \
+       (XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK) + XFS_B_TO_FSB(mp, v))
+#define        XFS_CREATE_SPACE_RES(mp,nl)     \
+       (XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl))
+#define        XFS_DIOSTRAT_SPACE_RES(mp, v)   \
+       (XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK) + (v))
+#define        XFS_GROWFS_SPACE_RES(mp)        \
+       (2 * XFS_AG_MAXLEVELS(mp))
+#define        XFS_GROWFSRT_SPACE_RES(mp,b)    \
+       ((b) + XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK))
+#define        XFS_LINK_SPACE_RES(mp,nl)       \
+       XFS_DIRENTER_SPACE_RES(mp,nl)
+#define        XFS_MKDIR_SPACE_RES(mp,nl)      \
+       (XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl))
+#define        XFS_QM_DQALLOC_SPACE_RES(mp)    \
+       (XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK) + \
+        XFS_DQUOT_CLUSTER_SIZE_FSB)
+#define        XFS_QM_QINOCREATE_SPACE_RES(mp) \
+       XFS_IALLOC_SPACE_RES(mp)
+#define        XFS_REMOVE_SPACE_RES(mp)        \
+       XFS_DIRREMOVE_SPACE_RES(mp)
+#define        XFS_RENAME_SPACE_RES(mp,nl)     \
+       (XFS_DIRREMOVE_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl))
+#define        XFS_SYMLINK_SPACE_RES(mp,nl,b)  \
+       (XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl) + (b))
+
+#endif /* __XFS_TRANS_SPACE_H__ */
diff --git a/include/xfs_types.h b/include/xfs_types.h
new file mode 100644 (file)
index 0000000..83d0719
--- /dev/null
@@ -0,0 +1,303 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_TYPES_H__
+#define        __XFS_TYPES_H__
+
+/*
+ * Some types are conditional based on the selected configuration.
+ * Set XFS_BIG_FILES=1 or 0 and XFS_BIG_FILESYSTEMS=1 or 0 depending
+ * on the desired configuration.
+ * XFS_BIG_FILES needs pgno_t to be 64 bits (64-bit kernels).
+ * XFS_BIG_FILESYSTEMS needs daddr_t to be 64 bits (N32 and 64-bit kernels).
+ *
+ * Expect these to be set from klocaldefs, or from the machine-type
+ * defs files for the normal case.
+ */
+
+#define        XFS_BIG_FILES           1
+#define        XFS_BIG_FILESYSTEMS     1
+
+typedef __uint32_t     xfs_agblock_t;  /* blockno in alloc. group */
+typedef        __uint32_t      xfs_extlen_t;   /* extent length in blocks */
+typedef        __uint32_t      xfs_agnumber_t; /* allocation group number */
+typedef __int32_t      xfs_extnum_t;   /* # of extents in a file */
+typedef __int16_t      xfs_aextnum_t;  /* # extents in an attribute fork */
+typedef        __int64_t       xfs_fsize_t;    /* bytes in a file */
+typedef __uint64_t     xfs_ufsize_t;   /* unsigned bytes in a file */
+
+typedef        __int32_t       xfs_suminfo_t;  /* type of bitmap summary info */
+typedef        __int32_t       xfs_rtword_t;   /* word type for bitmap manipulations */
+
+typedef        __int64_t       xfs_lsn_t;      /* log sequence number */
+typedef        __int32_t       xfs_tid_t;      /* transaction identifier */
+
+typedef        __uint32_t      xfs_dablk_t;    /* dir/attr block number (in file) */
+typedef        __uint32_t      xfs_dahash_t;   /* dir/attr hash value */
+
+typedef __uint16_t     xfs_prid_t;     /* prid_t truncated to 16bits in XFS */
+
+/*
+ * These types are 64 bits on disk but are either 32 or 64 bits in memory.
+ * Disk based types:
+ */
+typedef __uint64_t     xfs_dfsbno_t;   /* blockno in filesystem (agno|agbno) */
+typedef __uint64_t     xfs_drfsbno_t;  /* blockno in filesystem (raw) */
+typedef        __uint64_t      xfs_drtbno_t;   /* extent (block) in realtime area */
+typedef        __uint64_t      xfs_dfiloff_t;  /* block number in a file */
+typedef        __uint64_t      xfs_dfilblks_t; /* number of blocks in a file */
+
+/*
+ * Memory based types are conditional.
+ */
+#if XFS_BIG_FILESYSTEMS
+typedef        __uint64_t      xfs_fsblock_t;  /* blockno in filesystem (agno|agbno) */
+typedef __uint64_t     xfs_rfsblock_t; /* blockno in filesystem (raw) */
+typedef __uint64_t     xfs_rtblock_t;  /* extent (block) in realtime area */
+typedef        __int64_t       xfs_srtblock_t; /* signed version of xfs_rtblock_t */
+#else
+typedef        __uint32_t      xfs_fsblock_t;  /* blockno in filesystem (agno|agbno) */
+typedef __uint32_t     xfs_rfsblock_t; /* blockno in filesystem (raw) */
+typedef __uint32_t     xfs_rtblock_t;  /* extent (block) in realtime area */
+typedef        __int32_t       xfs_srtblock_t; /* signed version of xfs_rtblock_t */
+#endif
+#if XFS_BIG_FILES
+typedef        __uint64_t      xfs_fileoff_t;  /* block number in a file */
+typedef        __int64_t       xfs_sfiloff_t;  /* signed block number in a file */
+typedef        __uint64_t      xfs_filblks_t;  /* number of blocks in a file */
+#else
+typedef        __uint32_t      xfs_fileoff_t;  /* block number in a file */
+typedef        __int32_t       xfs_sfiloff_t;  /* signed block number in a file */
+typedef        __uint32_t      xfs_filblks_t;  /* number of blocks in a file */
+#endif
+
+typedef __uint8_t       xfs_arch_t;     /* architecutre of an xfs fs */
+
+/*
+ * Null values for the types.
+ */
+#define        NULLDFSBNO      ((xfs_dfsbno_t)-1)
+#define        NULLDRFSBNO     ((xfs_drfsbno_t)-1)
+#define        NULLDRTBNO      ((xfs_drtbno_t)-1)
+#define        NULLDFILOFF     ((xfs_dfiloff_t)-1)
+
+#define        NULLFSBLOCK     ((xfs_fsblock_t)-1)
+#define        NULLRFSBLOCK    ((xfs_rfsblock_t)-1)
+#define        NULLRTBLOCK     ((xfs_rtblock_t)-1)
+#define        NULLFILEOFF     ((xfs_fileoff_t)-1)
+
+#define        NULLAGBLOCK     ((xfs_agblock_t)-1)
+#define        NULLAGNUMBER    ((xfs_agnumber_t)-1)
+#define        NULLEXTNUM      ((xfs_extnum_t)-1)
+
+#define NULLCOMMITLSN  ((xfs_lsn_t)-1)
+
+/*
+ * Max values for extlen, extnum, aextnum.
+ */
+#define        MAXEXTLEN       ((xfs_extlen_t)0x001fffff)      /* 21 bits */
+#define        MAXEXTNUM       ((xfs_extnum_t)0x7fffffff)      /* signed int */
+#define        MAXAEXTNUM      ((xfs_aextnum_t)0x7fff)         /* signed short */
+
+/*
+ * MAXNAMELEN is the length (including the terminating null) of
+ * the longest permissible file (component) name.
+ */
+#define MAXNAMELEN     256
+
+typedef enum {
+       XFS_LOOKUP_EQi, XFS_LOOKUP_LEi, XFS_LOOKUP_GEi
+} xfs_lookup_t;
+
+typedef enum {
+       XFS_BTNUM_BNOi, XFS_BTNUM_CNTi, XFS_BTNUM_BMAPi, XFS_BTNUM_INOi,
+       XFS_BTNUM_MAX
+} xfs_btnum_t;
+
+
+#ifdef CONFIG_PROC_FS
+/*
+ * XFS global statistics
+ */
+struct xfsstats {
+# define XFSSTAT_END_EXTENT_ALLOC      4
+       __uint32_t              xs_allocx;
+       __uint32_t              xs_allocb;
+       __uint32_t              xs_freex;
+       __uint32_t              xs_freeb;
+# define XFSSTAT_END_ALLOC_BTREE       (XFSSTAT_END_EXTENT_ALLOC+4)
+       __uint32_t              xs_abt_lookup;
+       __uint32_t              xs_abt_compare;
+       __uint32_t              xs_abt_insrec;
+       __uint32_t              xs_abt_delrec;
+# define XFSSTAT_END_BLOCK_MAPPING     (XFSSTAT_END_ALLOC_BTREE+7)
+       __uint32_t              xs_blk_mapr;
+       __uint32_t              xs_blk_mapw;
+       __uint32_t              xs_blk_unmap;
+       __uint32_t              xs_add_exlist;
+       __uint32_t              xs_del_exlist;
+       __uint32_t              xs_look_exlist;
+       __uint32_t              xs_cmp_exlist;
+# define XFSSTAT_END_BLOCK_MAP_BTREE   (XFSSTAT_END_BLOCK_MAPPING+4)
+       __uint32_t              xs_bmbt_lookup;
+       __uint32_t              xs_bmbt_compare;
+       __uint32_t              xs_bmbt_insrec;
+       __uint32_t              xs_bmbt_delrec;
+# define XFSSTAT_END_DIRECTORY_OPS     (XFSSTAT_END_BLOCK_MAP_BTREE+4)
+       __uint32_t              xs_dir_lookup;
+       __uint32_t              xs_dir_create;
+       __uint32_t              xs_dir_remove;
+       __uint32_t              xs_dir_getdents;
+# define XFSSTAT_END_TRANSACTIONS      (XFSSTAT_END_DIRECTORY_OPS+3)
+       __uint32_t              xs_trans_sync;
+       __uint32_t              xs_trans_async;
+       __uint32_t              xs_trans_empty;
+# define XFSSTAT_END_INODE_OPS         (XFSSTAT_END_TRANSACTIONS+7)
+       __uint32_t              xs_ig_attempts;
+       __uint32_t              xs_ig_found;
+       __uint32_t              xs_ig_frecycle;
+       __uint32_t              xs_ig_missed;
+       __uint32_t              xs_ig_dup;
+       __uint32_t              xs_ig_reclaims;
+       __uint32_t              xs_ig_attrchg;
+# define XFSSTAT_END_LOG_OPS           (XFSSTAT_END_INODE_OPS+5)
+       __uint32_t              xs_log_writes;
+       __uint32_t              xs_log_blocks;
+       __uint32_t              xs_log_noiclogs;
+       __uint32_t              xs_log_force;
+       __uint32_t              xs_log_force_sleep;
+# define XFSSTAT_END_TAIL_PUSHING      (XFSSTAT_END_LOG_OPS+10)
+       __uint32_t              xs_try_logspace;
+       __uint32_t              xs_sleep_logspace;
+       __uint32_t              xs_push_ail;
+       __uint32_t              xs_push_ail_success;
+       __uint32_t              xs_push_ail_pushbuf;
+       __uint32_t              xs_push_ail_pinned;
+       __uint32_t              xs_push_ail_locked;
+       __uint32_t              xs_push_ail_flushing;
+       __uint32_t              xs_push_ail_restarts;
+       __uint32_t              xs_push_ail_flush;
+# define XFSSTAT_END_WRITE_CONVERT     (XFSSTAT_END_TAIL_PUSHING+2)
+       __uint32_t              xs_xstrat_quick;
+       __uint32_t              xs_xstrat_split;
+# define XFSSTAT_END_READ_WRITE_OPS    (XFSSTAT_END_WRITE_CONVERT+2)
+       __uint32_t              xs_write_calls;
+       __uint32_t              xs_read_calls;
+# define XFSSTAT_END_ATTRIBUTE_OPS     (XFSSTAT_END_READ_WRITE_OPS+4)
+       __uint32_t              xs_attr_get;
+       __uint32_t              xs_attr_set;
+       __uint32_t              xs_attr_remove;
+       __uint32_t              xs_attr_list;
+# define XFSSTAT_END_QUOTA_OPS         (XFSSTAT_END_ATTRIBUTE_OPS+8)
+       __uint32_t              xs_qm_dqreclaims;
+       __uint32_t              xs_qm_dqreclaim_misses;
+       __uint32_t              xs_qm_dquot_dups;
+       __uint32_t              xs_qm_dqcachemisses;
+       __uint32_t              xs_qm_dqcachehits;
+       __uint32_t              xs_qm_dqwants;
+       __uint32_t              xs_qm_dqshake_reclaims;
+       __uint32_t              xs_qm_dqinact_reclaims;
+# define XFSSTAT_END_INODE_CLUSTER     (XFSSTAT_END_QUOTA_OPS+3)
+       __uint32_t              xs_iflush_count;
+       __uint32_t              xs_icluster_flushcnt;
+       __uint32_t              xs_icluster_flushinode;
+# define XFSSTAT_END_VNODE_OPS         (XFSSTAT_END_INODE_CLUSTER+8)
+       __uint32_t              vn_active;      /* # vnodes not on free lists */
+       __uint32_t              vn_alloc;       /* # times vn_alloc called */
+       __uint32_t              vn_get;         /* # times vn_get called */
+       __uint32_t              vn_hold;        /* # times vn_hold called */
+       __uint32_t              vn_rele;        /* # times vn_rele called */
+       __uint32_t              vn_reclaim;     /* # times vn_reclaim called */
+       __uint32_t              vn_remove;      /* # times vn_remove called */
+       __uint32_t              vn_free;        /* # times vn_free called */
+       struct xfsstats_xpc {
+               __uint64_t      xs_xstrat_bytes;
+               __uint64_t      xs_write_bytes;
+               __uint64_t      xs_read_bytes;
+       } xpc;
+} xfsstats;
+
+# define XFS_STATS_INC(count)          ( xfsstats.##count ++ )
+# define XFS_STATS_DEC(count)          ( xfsstats.##count -- )
+# define XFS_STATS_ADD(count, inc)     ( xfsstats.##count += (inc) )
+# define XFS_STATS64_INC(count)                ( xfsstats.xpc.##count ++ )
+# define XFS_STATS64_ADD(count, inc)   ( xfsstats.xpc.##count += (inc) )
+#else  /* !CONFIG_PROC_FS */
+# define XFS_STATS_INC(count)
+# define XFS_STATS_DEC(count)
+# define XFS_STATS_ADD(count, inc)
+# define XFS_STATS64_INC(count)
+# define XFS_STATS64_ADD(count, inc)
+#endif /* !CONFIG_PROC_FS */
+
+
+#ifdef __KERNEL__
+
+/* juggle IRIX device numbers - still used in ondisk structures */
+
+#define IRIX_DEV_BITSMAJOR      14
+#define IRIX_DEV_BITSMINOR      18 
+#define IRIX_DEV_MAXMAJ         0x1ff 
+#define IRIX_DEV_MAXMIN         0x3ffff
+#define IRIX_DEV_MAJOR(dev)     ((int)(((unsigned)(dev)>>IRIX_DEV_BITSMINOR) \
+                                    & IRIX_DEV_MAXMAJ))
+#define IRIX_DEV_MINOR(dev)     ((int)((dev)&IRIX_DEV_MAXMIN))
+#define IRIX_MKDEV(major,minor) ((xfs_dev_t)(((major)<<IRIX_DEV_BITSMINOR) \
+                                    | (minor&IRIX_DEV_MAXMIN)))
+                                    
+#define IRIX_DEV_TO_KDEVT(dev)  MKDEV(IRIX_DEV_MAJOR(dev),IRIX_DEV_MINOR(dev))
+#define IRIX_DEV_TO_DEVT(dev)   ((IRIX_DEV_MAJOR(dev)<<8)|IRIX_DEV_MINOR(dev))
+
+/* __psint_t is the same size as a pointer */
+#if (BITS_PER_LONG == 32)
+typedef __int32_t __psint_t;
+typedef __uint32_t __psunsigned_t;
+#elif (BITS_PER_LONG == 64)
+typedef __int64_t __psint_t;
+typedef __uint64_t __psunsigned_t;
+#else
+#error BITS_PER_LONG must be 32 or 64
+#endif
+
+
+/*
+ * struct for passing owner/requestor id
+ */
+typedef struct flid {
+#ifdef CELL_CAPABLE
+        pid_t   fl_pid;
+        sysid_t fl_sysid;
+#endif
+} flid_t;
+
+#endif /* __KERNEL__ */
+
+#endif /* !__XFS_TYPES_H */
diff --git a/include/xqm.h b/include/xqm.h
new file mode 100644 (file)
index 0000000..a8cc558
--- /dev/null
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XQM_H__
+#define __XQM_H__
+
+#include <linux/types.h>
+
+#define XQM_CMD(cmd)   ( ('X'<<8)+(cmd) )
+#define IS_XQM_CMD(cmd)        ( ((int)(cmd)>>8) == 'X' )
+
+/*
+ * Disk quota - quotactl(2) commands for XFS Quota Manager (XQM).
+ */
+#define Q_XQUOTAON   XQM_CMD(0x1)  /* enable quota accounting/enforcement */
+#define Q_XQUOTAOFF  XQM_CMD(0x2)  /* disable quota accounting/enforcement */
+#define Q_XGETQUOTA  XQM_CMD(0x3)  /* get disk limits & usage */
+#define Q_XSETQLIM   XQM_CMD(0x4)  /* set disk limits only */
+#define Q_XGETQSTAT  XQM_CMD(0x5)  /* returns fs_quota_stat_t struct */
+#define Q_XQUOTARM   XQM_CMD(0x6)  /* free quota files' space */
+
+/*
+ * fs_disk_quota structure:
+ *
+ * This contains the current quota information regarding a user/proj/group.
+ * It is 64-bit aligned, and all the blk units are in BBs (Basic Blocks) of
+ * 512 bytes.
+ */
+#define FS_DQUOT_VERSION       1       /* fs_disk_quota.d_version */
+typedef struct fs_disk_quota {
+       __s8            d_version;      /* version of this structure */
+       __s8            d_flags;        /* XFS_{USER,PROJ,GROUP}_QUOTA */
+       __u16           d_fieldmask;    /* field specifier */
+       __u32           d_id;           /* user, project, or group ID */
+       __u64           d_blk_hardlimit;/* absolute limit on disk blks */
+       __u64           d_blk_softlimit;/* preferred limit on disk blks */
+       __u64           d_ino_hardlimit;/* maximum # allocated inodes */
+       __u64           d_ino_softlimit;/* preferred inode limit */
+       __u64           d_bcount;       /* # disk blocks owned by the user */
+       __u64           d_icount;       /* # inodes owned by the user */
+       __s32           d_itimer;       /* zero if within inode limits */
+                                       /* if not, we refuse service */
+       __s32           d_btimer;       /* similar to above; for disk blocks */
+       __u16           d_iwarns;       /* # warnings issued wrt num inodes */
+       __u16           d_bwarns;       /* # warnings issued wrt disk blocks */
+       __s32           d_padding2;     /* padding2 - for future use */
+       __u64           d_rtb_hardlimit;/* absolute limit on realtime blks */
+       __u64           d_rtb_softlimit;/* preferred limit on RT disk blks */
+       __u64           d_rtbcount;     /* # realtime blocks owned */
+       __s32           d_rtbtimer;     /* similar to above; for RT disk blks */
+       __u16           d_rtbwarns;     /* # warnings issued wrt RT disk blks */
+       __s16           d_padding3;     /* padding3 - for future use */ 
+       char            d_padding4[8];  /* yet more padding */
+} fs_disk_quota_t;
+
+/*
+ * These fields are sent to Q_XSETQLIM to specify fields that need to change.
+ */
+#define FS_DQ_ISOFT    (1<<0)
+#define FS_DQ_IHARD    (1<<1)
+#define FS_DQ_BSOFT    (1<<2)
+#define FS_DQ_BHARD    (1<<3)
+#define FS_DQ_RTBSOFT  (1<<4)
+#define FS_DQ_RTBHARD  (1<<5)
+#define FS_DQ_LIMIT_MASK       (FS_DQ_ISOFT | FS_DQ_IHARD | FS_DQ_BSOFT | \
+                                FS_DQ_BHARD | FS_DQ_RTBSOFT | FS_DQ_RTBHARD)
+/*
+ * These timers can only be set in super user's dquot. For others, timers are
+ * automatically started and stopped. Superusers timer values set the limits
+ * for the rest.  In case these values are zero, the DQ_{F,B}TIMELIMIT values
+ * defined below are used. 
+ * These values also apply only to the d_fieldmask field for Q_XSETQLIM.
+ */
+#define FS_DQ_BTIMER   (1<<6)
+#define FS_DQ_ITIMER   (1<<7)
+#define FS_DQ_RTBTIMER         (1<<8)
+#define FS_DQ_TIMER_MASK       (FS_DQ_BTIMER | FS_DQ_ITIMER | FS_DQ_RTBTIMER)
+
+/*
+ * The following constants define the default amount of time given a user
+ * before the soft limits are treated as hard limits (usually resulting
+ * in an allocation failure).  These may be modified by the quotactl(2)
+ * system call with the Q_XSETQLIM command.
+ */
+#define        DQ_FTIMELIMIT   (7 * 24*60*60)          /* 1 week */
+#define        DQ_BTIMELIMIT   (7 * 24*60*60)          /* 1 week */
+
+/*
+ * Various flags related to quotactl(2).  Only relevant to XFS filesystems.
+ */
+#define XFS_QUOTA_UDQ_ACCT     (1<<0)  /* user quota accounting */
+#define XFS_QUOTA_UDQ_ENFD     (1<<1)  /* user quota limits enforcement */
+#define XFS_QUOTA_PDQ_ACCT     (1<<2)  /* project quota accounting */
+#define XFS_QUOTA_PDQ_ENFD     (1<<3)  /* project quota limits enforcement */
+#define XFS_QUOTA_GDQ_ACCT     (1<<4)  /* group quota accounting */
+#define XFS_QUOTA_GDQ_ENFD     (1<<5)  /* group quota limits enforcement */
+
+#define XFS_USER_QUOTA         (1<<0)  /* user quota type */
+#define XFS_PROJ_QUOTA         (1<<1)  /* project quota type */
+#define XFS_GROUP_QUOTA                (1<<2)  /* group quota type */
+
+/*
+ * fs_quota_stat is the struct returned in Q_XGETQSTAT for a given file system.
+ * Provides a centralized way to get meta infomation about the quota subsystem.
+ * eg. space taken up for user and aggregate quotas, number of dquots currently
+ * incore.
+ */
+#define FS_QSTAT_VERSION       1       /* fs_quota_stat.qs_version */
+
+/*
+ * Some basic infomation about 'quota files'.
+ */
+typedef struct fs_qfilestat {
+       __u64           qfs_ino;        /* inode number */
+       __u64           qfs_nblks;      /* number of BBs 512-byte-blks */
+       __u32           qfs_nextents;   /* number of extents */
+} fs_qfilestat_t;
+
+typedef struct fs_quota_stat {
+       __s8            qs_version;     /* version number for future changes */
+       __u16           qs_flags;       /* XFS_QUOTA_{U,P,G}DQ_{ACCT,ENFD} */
+       __s8            qs_pad;         /* unused */
+       fs_qfilestat_t  qs_uquota;      /* user quota storage information */
+       fs_qfilestat_t  qs_aquota;      /* aggr quota storage information */
+       __u32           qs_incoredqs;   /* number of dquots incore */
+       __s32           qs_btimelimit;  /* limit for blks timer */      
+       __s32           qs_itimelimit;  /* limit for inodes timer */    
+       __s32           qs_rtbtimelimit;/* limit for rt blks timer */   
+       __u16           qs_bwarnlimit;  /* limit for num warnings */
+       __u16           qs_iwarnlimit;  /* limit for num warnings */
+} fs_quota_stat_t;
+
+
+#ifdef __KERNEL__
+extern int xqm_quotactl(int, const char *, int, caddr_t);
+#endif
+
+#endif /* __XQM_H__ */
diff --git a/libxfs/Makefile b/libxfs/Makefile
new file mode 100644 (file)
index 0000000..ce45344
--- /dev/null
@@ -0,0 +1,62 @@
+#
+# Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+# 
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# 
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like.  Any license provided herein, whether implied or
+# otherwise, applies only to this software file.  Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write the Free Software Foundation, Inc., 59
+# Temple Place - Suite 330, Boston MA 02111-1307, USA.
+# 
+# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+# Mountain View, CA  94043, or:
+# 
+# http://www.sgi.com 
+# 
+# For further information regarding this notice, see: 
+# 
+# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+#
+
+TOPDIR = ..
+include $(TOPDIR)/include/builddefs
+
+STATICLIBTARGET = libxfs.a
+HFILES = xfs.h
+CFILES = arch.c init.c logitem.c rdwr.c trans.c util.c \
+       xfs_bit.c xfs_rtbit.c xfs_alloc.c xfs_ialloc.c xfs_rtalloc.c \
+       xfs_inode.c xfs_btree.c xfs_alloc_btree.c xfs_ialloc_btree.c \
+       xfs_bmap_btree.c xfs_da_btree.c xfs_dir.c xfs_dir_leaf.c \
+       xfs_dir2.c xfs_dir2_leaf.c xfs_attr_leaf.c xfs_dir2_block.c \
+       xfs_dir2_node.c xfs_dir2_data.c xfs_dir2_sf.c xfs_bmap.c \
+       xfs_mount.c xfs_trans.c
+
+# xfs_repair is braindead, don't try linking it with a debug libxfs yet.
+DEBUG = -DNDEBUG
+
+# 
+# Tracing flags:
+# -DIO_DEBUG           reads and writes of buffers
+# -DMEM_DEBUG          all zone memory use
+# -DLI_DEBUG           log item (ino/buf) manipulation
+# -DXACT_DEBUG         transaction state changes
+# 
+LCFLAGS += -Wno-unknown-pragmas -Wno-unused -Wno-uninitialized -I.
+
+default: $(STATICLIBTARGET)
+
+include $(BUILDRULES)
+
+install: default
diff --git a/libxfs/init.c b/libxfs/init.c
new file mode 100644 (file)
index 0000000..e3142dd
--- /dev/null
@@ -0,0 +1,764 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#define ustat __kernel_ustat
+#include <libxfs.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <mntent.h>
+#include <sys/stat.h>
+#undef ustat
+#include <sys/ustat.h>
+#include <sys/ioctl.h>
+#include <sys/mount.h>
+
+#ifndef BLKSETSIZE     /* Baaad m'kay, but it's not in libc yet */
+#define BLKSETSIZE _IO(0x12,108)       /* set device block size */
+#endif
+
+#define findrawpath(x) x
+#define findblockpath(x) x
+
+char *progname = "libxfs";     /* default, changed by each tool */
+
+/*
+ * dev_map - map open devices to fd.
+ */
+#define MAX_DEVS 10    /* arbitary maximum */
+int nextfakedev = -1;  /* device number to give to next fake device */
+static struct dev_to_fd {
+       dev_t dev;
+       int fd;
+} dev_map[MAX_DEVS]={{0}};
+
+static int
+check_ismounted(char *name, char *block, int verbose)
+{
+       struct ustat    ust;
+       struct stat64   st;
+
+       if (stat64(block, &st) < 0)
+               return 0;
+       if ((st.st_mode & S_IFMT) != S_IFBLK)
+               return 0;
+       if (ustat(st.st_rdev, &ust) >= 0) {
+               if (verbose)
+                       fprintf(stderr,
+                               "%s: %s contains a mounted filesystem\n",
+                               progname, name);
+               return 1;
+       }
+       return 0;
+}
+
+/*
+ * Checks whether a given device has a mounted, writable
+ * filesystem, returns 1 if it does & fatal (just warns
+ * if not fatal, but allows us to proceed).
+ * 
+ * Useful to tools which will produce uncertain results
+ * if the filesystem is active - repair, check, logprint.
+ */
+static int
+check_isactive(char *name, char *block, int fatal)
+{
+       int             sts = 0;
+       FILE            *f;
+       struct mntent   *mnt;
+
+       if (check_ismounted(name, block, 0)) {
+               if ((f = setmntent(MOUNTED, "r")) == NULL) {
+                       fprintf(stderr,
+                               "%s: %s contains a possibly writable, mounted "
+                               "filesystem\n", progname, name);
+                       return fatal;
+               }
+               while ((mnt = getmntent(f)) != NULL) {
+                       if (hasmntopt(mnt, MNTOPT_RO) != NULL)
+                               break;
+               }
+               if (mnt == NULL) {
+                       fprintf(stderr,
+                               "%s: %s contains a writable mounted "
+                               "filesystem\n", progname, name);
+                       sts = fatal;
+               }
+               endmntent(f);
+       }
+       return sts;
+}
+
+static __int64_t
+findsize(char *path)
+{
+       int     fd;
+       int     error;
+       long    size;
+       struct stat64   st;
+
+       /* Test to see if we are dealing with a regular file rather than a
+        * block device, if we are just use the size returned by stat64
+        */
+       if (stat64(path, &st) < 0) {
+               fprintf(stderr, "%s: "
+                       "cannot stat the device special file \"%s\": %s\n",
+                       progname, path, strerror(errno));
+               exit(1);
+       }
+       if ((st.st_mode & S_IFMT) == S_IFREG) {
+               return (__int64_t)(st.st_size >> 9);
+       }
+
+       if ((fd = open(path, 0)) < 0) {
+               fprintf(stderr, "%s: "
+                       "error opening the device special file \"%s\": %s\n",
+                       progname, path, strerror(errno));
+               exit(1);
+       }
+       error = ioctl(fd, BLKGETSIZE, &size);
+       if (error < 0) {
+               fprintf(stderr, "%s: can't determine device size\n", progname);
+               exit(1);
+       }
+
+       close(fd);
+
+       return (__int64_t)size;
+}
+
+
+/* libxfs_device_to_fd: 
+ *     lookup a device number in the device map
+ *     return the associated fd
+ */
+int
+libxfs_device_to_fd(dev_t device)
+{
+       int d;
+       
+       for (d=0;d<MAX_DEVS;d++)
+               if (dev_map[d].dev == device) 
+                       return dev_map[d].fd;
+       
+       fprintf(stderr, "%s: device_to_fd: device %Ld is not open\n", 
+               progname, device);
+       exit(1);
+}
+
+/* libxfs_device_open:
+ *     open a device and return its device number
+ */
+dev_t
+libxfs_device_open(char *path, int creat, int readonly)
+{
+       int             fd;
+       dev_t           dev;
+       int             d;
+       struct stat     statb;
+       int             blocksize = 512; /* bytes */
+
+       if ((fd = open(path,
+                       (readonly ? O_RDONLY : O_RDWR) |
+                       (creat ? O_CREAT|O_TRUNC : 0),
+                       0666)) < 0) {
+               fprintf(stderr, "%s: cannot open %s: %s\n",
+                       progname, path, strerror(errno));
+               exit(1);
+       }
+
+       if (stat(path, &statb)<0) {
+               fprintf(stderr, "%s: cannot stat %s: %s\n",
+                       progname, path, strerror(errno));
+               exit(1);
+       }
+       
+       /* Set device blocksize to 512 bytes */
+       if ((statb.st_mode & S_IFMT) == S_IFBLK) {
+               if (ioctl(fd, BLKSETSIZE, &blocksize) < 0) {
+                       fprintf(stderr, "%s: warning - cannot set blocksize on "
+                               "block device %s: %s\n",
+                               progname, path, strerror(errno));
+               }
+       }
+
+       /* get the device number from the stat buf - unless
+        * we're not opening a real device, in which case
+        * choose a new fake device number
+        */
+       dev=(statb.st_rdev)?(statb.st_rdev):(nextfakedev--);
+
+       for (d=0;d<MAX_DEVS;d++)
+               if (dev_map[d].dev == dev) {
+                       fprintf(stderr, "%s: device %Ld is already open\n", 
+                           progname, dev);
+                       exit(1);
+               }
+
+       for (d=0;d<MAX_DEVS;d++)
+               if (!dev_map[d].dev) {
+                       dev_map[d].dev=dev;
+                       dev_map[d].fd=fd;
+                       
+                       return dev;
+               }
+
+       fprintf(stderr, "%s: device_open: too many open devices\n", progname);
+       exit(1);
+}
+
+void
+libxfs_device_close(dev_t dev)
+{
+       int     d;
+
+       for (d=0;d<MAX_DEVS;d++)
+               if (dev_map[d].dev == dev) {
+                       int fd;
+                       
+                       fd=dev_map[d].dev;
+                       dev_map[d].dev=dev_map[d].fd=0;
+                       
+                       fsync(fd);
+                       ioctl(fd, BLKFLSBUF, 0);
+                       close(fd);
+                       
+                       return;
+               }
+
+       fprintf(stderr, "%s: device_close: device %Ld is not open\n",
+                       progname, dev);
+       ASSERT(0);
+       exit(1);
+}
+
+
+/*
+ * libxfs initialization.
+ * Caller gets a 0 on failure (and we print a message), 1 on success.
+ */
+int
+libxfs_init(libxfs_init_t *a)
+{
+       char            *blockfile;
+       char            curdir[MAXPATHLEN];
+       char            *dname;
+       char            dpath[25];
+       int             fd;
+       char            *logname;
+       char            logpath[25];
+       int             needcd;
+       char            *rawfile;
+       char            *rtname;
+       char            rtpath[25];
+       int             rval = 0;
+       int             readonly;
+       int             inactive;
+       struct stat64   stbuf;
+
+       dpath[0] = logpath[0] = rtpath[0] = '\0';
+       dname = a->dname;
+       logname = a->logname;
+       rtname = a->rtname;
+       a->ddev = a->logdev = a->rtdev = 0;
+       a->dfd = a->logfd = a->rtfd = -1;
+       a->dsize = a->logBBsize = a->logBBstart = a->rtsize = 0;
+
+       (void)getcwd(curdir,MAXPATHLEN);
+       needcd = 0;
+       fd = -1;
+       readonly = (a->isreadonly & LIBXFS_ISREADONLY);
+       inactive = (a->isreadonly & LIBXFS_ISINACTIVE);
+       if (a->volname) {
+               if (stat64(a->volname, &stbuf) < 0) {
+                       perror(a->volname);
+                       goto done;
+               }
+               if (!(rawfile = findrawpath(a->volname))) {
+                       fprintf(stderr, "%s: "
+                               "can't find a character device matching %s\n",
+                               progname, a->volname);
+                       goto done;
+               }
+               if (!(blockfile = findblockpath(a->volname))) {
+                       fprintf(stderr, "%s: "
+                               "can't find a block device matching %s\n",
+                               progname, a->volname);
+                       goto done;
+               }
+               if (!readonly && !inactive && check_ismounted(
+                                       a->volname, blockfile, 1))
+                       goto done;
+               if (inactive && check_isactive(
+                                       a->volname, blockfile, readonly))
+                       goto done;
+               needcd = 1;
+               fd = open(rawfile, O_RDONLY);
+#ifdef HAVE_VOLUME_MANAGER
+               xlv_getdev_t getdev;
+               if (ioctl(fd, DIOCGETVOLDEV, &getdev) < 0)
+#else
+               if (1)
+#endif
+               {
+                       if (a->notvolok) {
+                               dname = a->dname = a->volname;
+                               a->volname = NULL;
+                               goto voldone;
+                       }
+                       fprintf(stderr, "%s: "
+                               "%s is not a volume device name\n",
+                               progname, a->volname);
+                       if (a->notvolmsg)
+                               fprintf(stderr, a->notvolmsg, a->volname);
+                       goto done;
+               }
+#ifdef HAVE_VOLUME_MANAGER
+               if (getdev.data_subvol_dev && dname) {
+                       fprintf(stderr, "%s: "
+                               "%s has a data subvolume, cannot specify %s\n",
+                               progname, a->volname, dname);
+                       goto done;
+               }
+               if (getdev.log_subvol_dev && logname) {
+                       fprintf(stderr, "%s: "
+                               "%s has a log subvolume, cannot specify %s\n",
+                               progname, a->volname, logname);
+                       goto done;
+               }
+               if (getdev.rt_subvol_dev && rtname) {
+                       fprintf(stderr, "%s: %s has a realtime subvolume, "
+                               "cannot specify %s\n",
+                               progname, a->volname, rtname);
+                       goto done;
+               }
+               if (!dname && getdev.data_subvol_dev) {
+                       strcpy(dpath, "/tmp/libxfsdXXXXXX");
+                       (void)mktemp(dpath);
+                       if (mknod(dpath, S_IFCHR | 0600,
+                                 getdev.data_subvol_dev) < 0) {
+                               fprintf(stderr, "%s: mknod failed: %s\n",
+                                       progname, strerror(errno));
+                               goto done;
+                       }
+                       dname = dpath;
+               }
+               if (!logname && getdev.log_subvol_dev) {
+                       strcpy(logpath, "/tmp/libxfslXXXXXX");
+                       (void)mktemp(logpath);
+                       if (mknod(logpath, S_IFCHR | 0600,
+                                 getdev.log_subvol_dev) < 0) {
+                               fprintf(stderr, "%s: mknod failed: %s\n",
+                                       progname, strerror(errno));
+                               goto done;
+                       }
+                       logname = logpath;
+               }
+               if (!rtname && getdev.rt_subvol_dev) {
+                       strcpy(rtpath, "/tmp/libxfsrXXXXXX");
+                       (void)mktemp(rtpath);
+                       if (mknod(rtpath, S_IFCHR | 0600,
+                                 getdev.rt_subvol_dev) < 0) {
+                               fprintf(stderr, "%s: mknod failed: %s\n",
+                                       progname, strerror(errno));
+                               goto done;
+                       }
+                       rtname = rtpath;
+               }
+#endif
+       }
+voldone:
+       if (dname) {
+               if (dname[0] != '/' && needcd)
+                       chdir(curdir);
+               if (a->disfile) {
+                       a->ddev= libxfs_device_open(dname, a->dcreat, readonly);
+                       a->dfd = libxfs_device_to_fd(a->ddev);
+               } else {
+                       if (stat64(dname, &stbuf) < 0) {
+                               fprintf(stderr, "%s: stat64 failed on %s: %s\n",
+                                       progname, dname, strerror(errno));
+                               goto done;
+                       }
+                       if (!(rawfile = findrawpath(dname))) {
+                               fprintf(stderr, "%s: can't find a char device "
+                                       "matching %s\n", progname, dname);
+                               goto done;
+                       }
+                       if (!(blockfile = findblockpath(dname))) {
+                               fprintf(stderr, "%s: can't find a block device "
+                                       "matching %s\n", progname, dname);
+                               goto done;
+                       }
+                       if (!readonly && !inactive && check_ismounted(
+                                               dname, blockfile, 1))
+                               goto done;
+                       if (inactive && check_isactive(
+                                               dname, blockfile, readonly))
+                               goto done;
+                       a->ddev = libxfs_device_open(rawfile,
+                                       a->dcreat, readonly);
+                       a->dfd = libxfs_device_to_fd(a->ddev);
+                       a->dsize = findsize(rawfile);
+               }
+               needcd = 1;
+       } else
+               a->dsize = 0;
+       if (logname) {
+               if (logname[0] != '/' && needcd)
+                       chdir(curdir);
+               if (a->lisfile) {
+                       a->logdev = libxfs_device_open(logname,
+                                       a->lcreat, readonly);
+                       a->logfd = libxfs_device_to_fd(a->logdev);
+               } else {
+                       if (stat64(logname, &stbuf) < 0) {
+                               fprintf(stderr, "%s: stat64 failed on %s: %s\n",
+                                       progname, logname, strerror(errno));
+                               goto done;
+                       }
+                       if (!(rawfile = findrawpath(logname))) {
+                               fprintf(stderr, "%s: can't find a char device "
+                                       "matching %s\n", progname, logname);
+                               goto done;
+                       }
+                       if (!(blockfile = findblockpath(logname))) {
+                               fprintf(stderr, "%s: can't find a block device "
+                                       "matching %s\n", progname, logname);
+                               goto done;
+                       }
+                       if (!readonly && !inactive && check_ismounted(
+                                               logname, blockfile, 1))
+                               goto done;
+                       else if (inactive && check_isactive(
+                                               logname, blockfile, readonly))
+                               goto done;
+                       a->logdev = libxfs_device_open(rawfile,
+                                       a->lcreat, readonly);
+                       a->logfd = libxfs_device_to_fd(a->logdev);
+                       a->logBBsize = findsize(rawfile);
+               }
+               needcd = 1;
+       } else
+               a->logBBsize = 0;
+       if (rtname) {
+               if (rtname[0] != '/' && needcd)
+                       chdir(curdir);
+               if (a->risfile) {
+                       a->rtdev = libxfs_device_open(rtname,
+                                       a->rcreat, readonly);
+                       a->rtfd = libxfs_device_to_fd(a->rtdev);
+               } else {
+                       if (stat64(rtname, &stbuf) < 0) {
+                               fprintf(stderr, "%s: stat64 failed on %s: %s\n",
+                                       progname, rtname, strerror(errno));
+                               goto done;
+                       }
+                       if (!(rawfile = findrawpath(rtname))) {
+                               fprintf(stderr, "%s: can't find a char device "
+                                       "matching %s\n", progname, rtname);
+                               goto done;
+                       }
+                       if (!(blockfile = findblockpath(rtname))) {
+                               fprintf(stderr, "%s: can't find a block device "
+                                       "matching %s\n", progname, rtname);
+                               goto done;
+                       }
+                       if (!readonly && !inactive && check_ismounted(
+                                               rtname, blockfile, 1))
+                               goto done;
+                       if (inactive && check_isactive(
+                                               rtname, blockfile, readonly))
+                               goto done;
+                       a->rtdev = libxfs_device_open(rawfile,
+                                       a->rcreat, readonly);
+                       a->rtfd = libxfs_device_to_fd(a->rtdev);
+                       a->rtsize = findsize(rawfile);
+               }
+               needcd = 1;
+       } else
+               a->rtsize = 0;
+       if (a->dsize < 0) {
+               fprintf(stderr, "%s: can't get size for data subvolume\n",
+                       progname);
+               goto done;
+       }
+       if (a->logBBsize < 0) {
+               fprintf(stderr, "%s: can't get size for log subvolume\n",
+                       progname);
+               goto done;
+       }
+       if (a->rtsize < 0) {
+               fprintf(stderr, "%s: can't get size for realtime subvolume\n",
+                       progname);
+               goto done;
+       }
+       if (needcd)
+               chdir(curdir);
+       rval = 1;
+done:
+       if (dpath[0])
+               unlink(dpath);
+       if (logpath[0])
+               unlink(logpath);
+       if (rtpath[0])
+               unlink(rtpath);
+       if (fd >= 0)
+               close(fd);
+       if (!rval && a->ddev)
+               libxfs_device_close(a->ddev);
+       if (!rval && a->logdev)
+               libxfs_device_close(a->logdev);
+       if (!rval && a->rtdev)
+               libxfs_device_close(a->rtdev);
+       return rval;
+}
+
+
+/*
+ * Initialize/destroy all of the zone allocators we use.
+ */
+static void
+manage_zones(int release)
+{
+       extern xfs_zone_t       *xfs_ili_zone;
+       extern xfs_zone_t       *xfs_inode_zone;
+       extern xfs_zone_t       *xfs_ifork_zone;
+       extern xfs_zone_t       *xfs_dabuf_zone;
+       extern xfs_zone_t       *xfs_buf_item_zone;
+       extern xfs_zone_t       *xfs_da_state_zone;
+       extern xfs_zone_t       *xfs_btree_cur_zone;
+       extern xfs_zone_t       *xfs_bmap_free_item_zone;
+       extern void             xfs_dir_startup();
+
+       if (release) {  /* free zone allocation */
+               libxfs_free(xfs_inode_zone);
+               libxfs_free(xfs_ifork_zone);
+               libxfs_free(xfs_dabuf_zone);
+               libxfs_free(xfs_buf_item_zone);
+               libxfs_free(xfs_da_state_zone);
+               libxfs_free(xfs_btree_cur_zone);
+               libxfs_free(xfs_bmap_free_item_zone);
+               return;
+       }
+       /* otherwise initialise zone allocation */
+       xfs_inode_zone = libxfs_zone_init(sizeof(xfs_inode_t), "xfs_inode");
+       xfs_ifork_zone = libxfs_zone_init(sizeof(xfs_ifork_t), "xfs_ifork");
+       xfs_dabuf_zone = libxfs_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf");
+       xfs_ili_zone = libxfs_zone_init(
+                       sizeof(xfs_inode_log_item_t), "xfs_inode_log_item");
+       xfs_buf_item_zone = libxfs_zone_init(
+                       sizeof(xfs_buf_log_item_t), "xfs_buf_log_item");
+       xfs_da_state_zone = libxfs_zone_init(
+                       sizeof(xfs_da_state_t), "xfs_da_state");
+       xfs_btree_cur_zone = libxfs_zone_init(
+                       sizeof(xfs_btree_cur_t), "xfs_btree_cur");
+       xfs_bmap_free_item_zone = libxfs_zone_init(
+                       sizeof(xfs_bmap_free_item_t), "xfs_bmap_free_item");
+       xfs_dir_startup();
+}
+
+/*
+ * Get the bitmap and summary inodes into the mount structure
+ * at mount time.
+ */
+static int
+rtmount_inodes(xfs_mount_t *mp)
+{
+       int             error;
+       xfs_sb_t        *sbp;
+
+       sbp = &mp->m_sb;
+       if (sbp->sb_rbmino == NULLFSINO)
+               return 0;
+       error = libxfs_iread(mp, NULL, sbp->sb_rbmino, &mp->m_rbmip, 0);
+       if (error) {
+               fprintf(stderr, "%s: cannot read realtime bitmap inode (%d)\n",
+                       progname, error);
+               return error;
+       }
+       ASSERT(mp->m_rbmip != NULL);
+       ASSERT(sbp->sb_rsumino != NULLFSINO);
+       error = libxfs_iread(mp, NULL, sbp->sb_rsumino, &mp->m_rsumip, 0);
+       if (error) {
+               fprintf(stderr, "%s: cannot read realtime summary inode (%d)\n",
+                       progname, error);
+               return error;
+       }
+       ASSERT(mp->m_rsumip != NULL);
+       return 0;
+}
+
+/*
+ * Mount structure initialization, provides a filled-in xfs_mount_t
+ * such that the numerous XFS_* macros can be used.  If dev is zero,
+ * no IO will be performed (no size checks, read root inodes).
+ */
+xfs_mount_t *
+libxfs_mount(
+       xfs_mount_t     *mp,
+       xfs_sb_t        *sb,
+       dev_t           dev,
+       dev_t           logdev,
+       dev_t           rtdev,
+       int             rrootinos)
+{
+       xfs_daddr_t     d;
+       xfs_buf_t       *bp;
+       xfs_sb_t        *sbp;
+       size_t          size;
+       int             error;
+
+       mp->m_dev = dev;
+       mp->m_rtdev = rtdev;
+       mp->m_logdev = logdev;
+       mp->m_sb = *sb;
+       sbp = &(mp->m_sb);
+       manage_zones(0);
+
+       libxfs_mount_common(mp, sb);
+
+       libxfs_alloc_compute_maxlevels(mp);
+       libxfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK);
+       libxfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK);
+       libxfs_ialloc_compute_maxlevels(mp);
+
+       if (sbp->sb_imax_pct) {
+               /* Make sure the maximum inode count is a multiple of the
+                * units we allocate inodes in.
+                */
+               mp->m_maxicount = (sbp->sb_dblocks * sbp->sb_imax_pct) / 100;
+               mp->m_maxicount = ((mp->m_maxicount / mp->m_ialloc_blks) *
+                                 mp->m_ialloc_blks)  << sbp->sb_inopblog;
+       } else
+               mp->m_maxicount = 0;
+
+       mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE;
+
+       /*
+        * Set whether we're using inode alignment.
+        */
+       if (XFS_SB_VERSION_HASALIGN(&mp->m_sb) &&
+           mp->m_sb.sb_inoalignmt >=
+           XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size))
+               mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1;
+       else
+               mp->m_inoalign_mask = 0;
+       /*
+        * If we are using stripe alignment, check whether
+        * the stripe unit is a multiple of the inode alignment
+        */
+       if (   mp->m_dalign
+           && mp->m_inoalign_mask && !(mp->m_dalign & mp->m_inoalign_mask))
+               mp->m_sinoalign = mp->m_dalign;
+       else
+               mp->m_sinoalign = 0;
+
+       /*
+        * Check that the data (and log if separate) are an ok size.
+        */
+       d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
+       if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) {
+               fprintf(stderr, "%s: size check failed\n", progname);
+               return NULL;
+       }
+
+       /* Initialize the appropriate directory manager */
+       if (XFS_SB_VERSION_HASDIRV2(sbp))
+               libxfs_dir2_mount(mp);
+       else
+               libxfs_dir_mount(mp);
+
+       /* Initialize the precomputed transaction reservations values */
+       libxfs_trans_init(mp);
+
+       if (dev == 0)   /* maxtrres, we have no device so leave now */
+               return mp;
+
+       bp = libxfs_readbuf(mp->m_dev, d - 1, 1, 0);
+       if (bp == NULL) {
+               fprintf(stderr, "%s: data size check failed\n", progname);
+               return NULL;
+       }
+       libxfs_putbuf(bp);
+
+       if (mp->m_logdev && mp->m_logdev != mp->m_dev) {
+               d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
+               if ( (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) ||
+                    (!(bp = libxfs_readbuf(mp->m_logdev, d - 1, 1, 1)))) {
+                       fprintf(stderr, "%s: log size checks failed\n",
+                                       progname);
+                       return NULL;
+               }
+               libxfs_putbuf(bp);
+       }
+
+       /* Initialize realtime fields in the mount structure */
+       if (libxfs_rtmount_init(mp)) {
+               fprintf(stderr, "%s: real-time device init failed\n", progname);
+               return NULL;
+       }
+
+       /* Allocate and initialize the per-ag data */
+       size = sbp->sb_agcount * sizeof(xfs_perag_t);
+       if ((mp->m_perag = calloc(size, 1)) == NULL) {
+               fprintf(stderr, "%s: failed to alloc %d bytes: %s\n",
+                       progname, size, strerror(errno));
+               exit(1);
+       }
+
+       /*
+        * mkfs calls mount before the root inode is allocated.
+        */
+       if (rrootinos && sbp->sb_rootino != NULLFSINO) {
+               error = libxfs_iread(mp, NULL, sbp->sb_rootino,
+                               &mp->m_rootip, 0);
+               if (error) {
+                       fprintf(stderr, "%s: cannot read root inode (%d)\n",
+                               progname, error);
+                       return NULL;
+               }
+               ASSERT(mp->m_rootip != NULL);
+       }
+       if (rrootinos && rtmount_inodes(mp))
+               return NULL;
+       return mp;
+}
+
+/*
+ * Release any resourse obtained during a mount.
+ */
+void
+libxfs_umount(xfs_mount_t *mp)
+{
+       manage_zones(1);
+       free(mp->m_perag);
+}
diff --git a/libxfs/logitem.c b/libxfs/logitem.c
new file mode 100644 (file)
index 0000000..b261060
--- /dev/null
@@ -0,0 +1,496 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <xfs.h>
+
+xfs_zone_t     *xfs_buf_item_zone;
+xfs_zone_t     *xfs_ili_zone;          /* inode log item zone */
+
+
+/*
+ * This is called to add the given log item to the transaction's
+ * list of log items.  It must find a free log item descriptor
+ * or allocate a new one and add the item to that descriptor.
+ * The function returns a pointer to item descriptor used to point
+ * to the new item.  The log item will now point to its new descriptor
+ * with its li_desc field.
+ */
+xfs_log_item_desc_t *
+xfs_trans_add_item(xfs_trans_t *tp, xfs_log_item_t *lip)
+{
+       xfs_log_item_desc_t     *lidp;
+       xfs_log_item_chunk_t    *licp;
+       int                     i;
+
+       /*
+        * If there are no free descriptors, allocate a new chunk
+        * of them and put it at the front of the chunk list.
+        */
+       if (tp->t_items_free == 0) {
+               licp = (xfs_log_item_chunk_t*)
+                      kmem_alloc(sizeof(xfs_log_item_chunk_t), KM_SLEEP);
+               ASSERT(licp != NULL);
+               /*
+                * Initialize the chunk, and then
+                * claim the first slot in the newly allocated chunk.
+                */
+               XFS_LIC_INIT(licp);
+               XFS_LIC_CLAIM(licp, 0);
+               licp->lic_unused = 1;
+               XFS_LIC_INIT_SLOT(licp, 0);
+               lidp = XFS_LIC_SLOT(licp, 0);
+
+               /*
+                * Link in the new chunk and update the free count.
+                */
+               licp->lic_next = tp->t_items.lic_next;
+               tp->t_items.lic_next = licp;
+               tp->t_items_free = XFS_LIC_NUM_SLOTS - 1;
+
+               /*
+                * Initialize the descriptor and the generic portion
+                * of the log item.
+                *
+                * Point the new slot at this item and return it.
+                * Also point the log item at its currently active
+                * descriptor and set the item's mount pointer.
+                */
+               lidp->lid_item = lip;
+               lidp->lid_flags = 0;
+               lidp->lid_size = 0;
+               lip->li_desc = lidp;
+               lip->li_mountp = tp->t_mountp;
+               return (lidp);
+       }
+
+       /*
+        * Find the free descriptor. It is somewhere in the chunklist
+        * of descriptors.
+        */
+       licp = &tp->t_items;
+       while (licp != NULL) {
+               if (XFS_LIC_VACANCY(licp)) {
+                       if (licp->lic_unused <= XFS_LIC_MAX_SLOT) {
+                               i = licp->lic_unused;
+                               ASSERT(XFS_LIC_ISFREE(licp, i));
+                               break;
+                       }
+                       for (i = 0; i <= XFS_LIC_MAX_SLOT; i++) {
+                               if (XFS_LIC_ISFREE(licp, i))
+                                       break;
+                       }
+                       ASSERT(i <= XFS_LIC_MAX_SLOT);
+                       break;
+               }
+               licp = licp->lic_next;
+       }
+       ASSERT(licp != NULL);
+       /*
+        * If we find a free descriptor, claim it,
+        * initialize it, and return it.
+        */
+       XFS_LIC_CLAIM(licp, i);
+       if (licp->lic_unused <= i) {
+               licp->lic_unused = i + 1;
+               XFS_LIC_INIT_SLOT(licp, i);
+       }
+       lidp = XFS_LIC_SLOT(licp, i);
+       tp->t_items_free--;
+       lidp->lid_item = lip;
+       lidp->lid_flags = 0;
+       lidp->lid_size = 0;
+       lip->li_desc = lidp;
+       lip->li_mountp = tp->t_mountp;
+       return (lidp);
+}
+
+/*
+ * Free the given descriptor.
+ * 
+ * This requires setting the bit in the chunk's free mask corresponding
+ * to the given slot.
+ */
+void
+xfs_trans_free_item(xfs_trans_t        *tp, xfs_log_item_desc_t *lidp)
+{
+       uint                    slot;
+       xfs_log_item_chunk_t    *licp;
+       xfs_log_item_chunk_t    **licpp;
+
+       slot = XFS_LIC_DESC_TO_SLOT(lidp);
+       licp = XFS_LIC_DESC_TO_CHUNK(lidp);
+       XFS_LIC_RELSE(licp, slot);
+       lidp->lid_item->li_desc = NULL;
+       tp->t_items_free++;
+
+       /*
+        * If there are no more used items in the chunk and this is not
+        * the chunk embedded in the transaction structure, then free
+        * the chunk. First pull it from the chunk list and then
+        * free it back to the heap.  We didn't bother with a doubly
+        * linked list here because the lists should be very short
+        * and this is not a performance path.  It's better to save
+        * the memory of the extra pointer.
+        *
+        * Also decrement the transaction structure's count of free items
+        * by the number in a chunk since we are freeing an empty chunk.
+        */
+       if (XFS_LIC_ARE_ALL_FREE(licp) && (licp != &(tp->t_items))) {
+               licpp = &(tp->t_items.lic_next);
+               while (*licpp != licp) {
+                       ASSERT(*licpp != NULL);
+                       licpp = &((*licpp)->lic_next);
+               }
+               *licpp = licp->lic_next;
+               kmem_free(licp, sizeof(xfs_log_item_chunk_t));
+               tp->t_items_free -= XFS_LIC_NUM_SLOTS;
+       }
+}
+
+/*
+ * This is called to find the descriptor corresponding to the given
+ * log item.  It returns a pointer to the descriptor.
+ * The log item MUST have a corresponding descriptor in the given
+ * transaction.  This routine does not return NULL, it panics.
+ *
+ * The descriptor pointer is kept in the log item's li_desc field.
+ * Just return it.
+ */
+xfs_log_item_desc_t *
+xfs_trans_find_item(xfs_trans_t        *tp, xfs_log_item_t *lip)
+{
+       ASSERT(lip->li_desc != NULL);
+
+       return (lip->li_desc);
+}
+
+/*
+ * This is called to unlock all of the items of a transaction and to free
+ * all the descriptors of that transaction.
+ *
+ * It walks the list of descriptors and unlocks each item.  It frees
+ * each chunk except that embedded in the transaction as it goes along.
+ */
+void
+xfs_trans_free_items(
+       xfs_trans_t     *tp,
+       int             flags)
+{
+       xfs_log_item_chunk_t    *licp;
+       xfs_log_item_chunk_t    *next_licp;
+       int                     abort;
+
+       abort = flags & XFS_TRANS_ABORT;
+       licp = &tp->t_items;
+       /*
+        * Special case the embedded chunk so we don't free it below.
+        */
+       if (!XFS_LIC_ARE_ALL_FREE(licp)) {
+               (void) xfs_trans_unlock_chunk(licp, 1, abort, NULLCOMMITLSN);
+               XFS_LIC_ALL_FREE(licp);
+               licp->lic_unused = 0;
+       }
+       licp = licp->lic_next;
+
+       /*
+        * Unlock each item in each chunk and free the chunks.
+        */
+       while (licp != NULL) {
+               ASSERT(!XFS_LIC_ARE_ALL_FREE(licp));
+               (void) xfs_trans_unlock_chunk(licp, 1, abort, NULLCOMMITLSN);
+               next_licp = licp->lic_next;
+               kmem_free(licp, sizeof(xfs_log_item_chunk_t));
+               licp = next_licp;
+       }
+
+       /*
+        * Reset the transaction structure's free item count.
+        */
+       tp->t_items_free = XFS_LIC_NUM_SLOTS;
+       tp->t_items.lic_next = NULL;
+}
+
+/*
+ * Check to see if a buffer matching the given parameters is already
+ * a part of the given transaction.  Only check the first, embedded
+ * chunk, since we don't want to spend all day scanning large transactions.
+ */
+STATIC xfs_buf_t *
+xfs_trans_buf_item_match(
+       xfs_trans_t     *tp,
+       buftarg_t       *target,
+       xfs_daddr_t     blkno,
+       int             len)
+{
+       xfs_log_item_chunk_t    *licp;
+       xfs_log_item_desc_t     *lidp;
+       xfs_buf_log_item_t      *blip;
+       xfs_buf_t                       *bp;
+       int                     i;
+
+#ifdef LI_DEBUG
+       fprintf(stderr, "buf_item_match (fast) log items for xact %p\n", tp);
+#endif
+
+       bp = NULL;
+       len = BBTOB(len);
+       licp = &tp->t_items;
+       if (!XFS_LIC_ARE_ALL_FREE(licp)) {
+               for (i = 0; i < licp->lic_unused; i++) {
+                       /*
+                        * Skip unoccupied slots.
+                        */
+                       if (XFS_LIC_ISFREE(licp, i)) {
+                               continue;
+                       }
+
+                       lidp = XFS_LIC_SLOT(licp, i);
+                       blip = (xfs_buf_log_item_t *)lidp->lid_item;
+#ifdef LI_DEBUG
+                       fprintf(stderr,
+                               "\tfound log item, xact %p, blip=%p (%d/%d)\n",
+                               tp, blip, i, licp->lic_unused);
+#endif
+                       if (blip->bli_item.li_type != XFS_LI_BUF) {
+                               continue;
+                       }
+
+                       bp = blip->bli_buf;
+#ifdef LI_DEBUG
+                       fprintf(stderr,
+                       "\tfound buf %p log item, xact %p, blip=%p (%d)\n",
+                               bp, tp, blip, i);
+#endif
+                       if ((XFS_BUF_TARGET(bp) == target->dev) &&
+                           (XFS_BUF_ADDR(bp) == blkno) &&
+                           (XFS_BUF_COUNT(bp) == len)) {
+                               /*
+                                * We found it.  Break out and
+                                * return the pointer to the buffer.
+                                */
+#ifdef LI_DEBUG
+                               fprintf(stderr,
+                                       "\tfound REAL buf log item, bp=%p\n",
+                                       bp);
+#endif
+                               break;
+                       } else {
+                               bp = NULL;
+                       }
+               }
+       }
+#ifdef LI_DEBUG
+       if (!bp) fprintf(stderr, "\tfast search - got nothing\n");
+#endif
+       return bp;
+}
+
+/*
+ * Check to see if a buffer matching the given parameters is already
+ * a part of the given transaction.  Check all the chunks, we
+ * want to be thorough.
+ */
+STATIC xfs_buf_t *
+xfs_trans_buf_item_match_all(
+       xfs_trans_t     *tp,
+       buftarg_t       *target,
+       xfs_daddr_t     blkno,
+       int             len)
+{
+       xfs_log_item_chunk_t    *licp;
+       xfs_log_item_desc_t     *lidp;
+       xfs_buf_log_item_t      *blip;
+       xfs_buf_t                       *bp;
+       int                     i;
+
+#ifdef LI_DEBUG
+       fprintf(stderr, "buf_item_match_all (slow) log items for xact %p\n",
+               tp);
+#endif
+
+       bp = NULL;
+       len = BBTOB(len);
+       for (licp = &tp->t_items; licp != NULL; licp = licp->lic_next) {
+               if (XFS_LIC_ARE_ALL_FREE(licp)) {
+                       ASSERT(licp == &tp->t_items);
+                       ASSERT(licp->lic_next == NULL);
+                       return NULL;
+               }
+               for (i = 0; i < licp->lic_unused; i++) {
+                       /*
+                        * Skip unoccupied slots.
+                        */
+                       if (XFS_LIC_ISFREE(licp, i)) {
+                               continue;
+                       }
+
+                       lidp = XFS_LIC_SLOT(licp, i);
+                       blip = (xfs_buf_log_item_t *)lidp->lid_item;
+#ifdef LI_DEBUG
+                       fprintf(stderr,
+                               "\tfound log item, xact %p, blip=%p (%d/%d)\n",
+                               tp, blip, i, licp->lic_unused);
+#endif
+                       if (blip->bli_item.li_type != XFS_LI_BUF) {
+                               continue;
+                       }
+
+                       bp = blip->bli_buf;
+                       ASSERT(bp);
+                       ASSERT(XFS_BUF_ADDR(bp));
+#ifdef LI_DEBUG
+                       fprintf(stderr,
+                       "\tfound buf %p log item, xact %p, blip=%p (%d)\n",
+                               bp, tp, blip, i);
+#endif
+                       if ((XFS_BUF_TARGET(bp) == target->dev) &&
+                           (XFS_BUF_ADDR(bp) == blkno) &&
+                           (XFS_BUF_COUNT(bp) == len)) {
+                               /*
+                                * We found it.  Break out and
+                                * return the pointer to the buffer.
+                                */
+#ifdef LI_DEBUG
+                               fprintf(stderr,
+                                       "\tfound REAL buf log item, bp=%p\n",
+                                       bp);
+#endif
+                               return bp;
+                       }
+               }
+       }
+#ifdef LI_DEBUG
+       if (!bp) fprintf(stderr, "slow search - got nothing\n");
+#endif
+       return NULL;
+}
+
+/*
+ * Allocate a new buf log item to go with the given buffer.
+ * Set the buffer's b_fsprivate field to point to the new
+ * buf log item.  If there are other item's attached to the
+ * buffer (see xfs_buf_attach_iodone() below), then put the
+ * buf log item at the front.
+ */
+void
+xfs_buf_item_init(
+       xfs_buf_t       *bp,
+       xfs_mount_t     *mp)
+{
+       xfs_log_item_t          *lip;
+       xfs_buf_log_item_t      *bip;
+
+#ifdef LI_DEBUG
+       fprintf(stderr, "buf_item_init for buffer %p\n", bp);
+#endif
+
+       /*
+        * Check to see if there is already a buf log item for
+        * this buffer.  If there is, it is guaranteed to be
+        * the first.  If we do already have one, there is
+        * nothing to do here so return.
+        */
+       if (XFS_BUF_FSPRIVATE3(bp, xfs_mount_t *) != mp)
+               XFS_BUF_SET_FSPRIVATE3(bp, mp);
+       XFS_BUF_SET_BDSTRAT_FUNC(bp, xfs_bdstrat_cb);
+       if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) {
+               lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
+               if (lip->li_type == XFS_LI_BUF) {
+#ifdef LI_DEBUG
+                       fprintf(stderr,
+                               "reused buf item %p for pre-logged buffer %p\n",
+                               lip, bp);
+#endif
+                       return;
+               }
+       }
+
+       bip = (xfs_buf_log_item_t *)kmem_zone_zalloc(xfs_buf_item_zone,
+                                                   KM_SLEEP);
+#ifdef LI_DEBUG
+       fprintf(stderr, "adding buf item %p for not-logged buffer %p\n",
+               bip, bp);
+#endif
+       bip->bli_item.li_type = XFS_LI_BUF;
+       bip->bli_item.li_mountp = mp;
+       bip->bli_buf = bp;
+       bip->bli_format.blf_type = XFS_LI_BUF;
+       bip->bli_format.blf_blkno = (__int64_t)XFS_BUF_ADDR(bp);
+       bip->bli_format.blf_len = (ushort)BTOBB(XFS_BUF_COUNT(bp));
+       XFS_BUF_SET_FSPRIVATE(bp, bip);
+}
+
+
+/*
+ * Mark bytes first through last inclusive as dirty in the buf
+ * item's bitmap.
+ */
+void
+xfs_buf_item_log(
+       xfs_buf_log_item_t      *bip,
+       uint                    first,
+       uint                    last)
+{
+       /*
+        * Mark the item as having some dirty data for
+        * quick reference in xfs_buf_item_dirty.
+        */
+       bip->bli_flags |= XFS_BLI_DIRTY;
+}
+
+/*
+ * Initialize the inode log item for a newly allocated (in-core) inode.
+ */
+void
+xfs_inode_item_init(
+       xfs_inode_t     *ip,
+       xfs_mount_t     *mp)
+{
+       xfs_inode_log_item_t    *iip;
+
+       ASSERT(ip->i_itemp == NULL);
+       iip = ip->i_itemp = (xfs_inode_log_item_t *)
+                       kmem_zone_zalloc(xfs_ili_zone, KM_SLEEP);
+#ifdef LI_DEBUG
+       fprintf(stderr, "inode_item_init for inode %llu, iip=%p\n",
+               ip->i_ino, iip);
+#endif
+
+       iip->ili_item.li_type = XFS_LI_INODE;
+       iip->ili_item.li_mountp = mp;
+       iip->ili_inode = ip;
+       iip->ili_format.ilf_type = XFS_LI_INODE;
+       iip->ili_format.ilf_ino = ip->i_ino;
+       iip->ili_format.ilf_blkno = ip->i_blkno;
+       iip->ili_format.ilf_len = ip->i_len;
+       iip->ili_format.ilf_boffset = ip->i_boffset;
+}
diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c
new file mode 100644 (file)
index 0000000..06fb1a5
--- /dev/null
@@ -0,0 +1,468 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include <malloc.h>
+#include <errno.h>
+#include <fcntl.h>
+
+#include <xfs_log.h>
+#include <xfs_log_priv.h>
+
+#define BBTOOFF64(bbs)  (((xfs_off_t)(bbs)) << BBSHIFT)
+#define BDSTRAT_SIZE    (256 * 1024)
+
+void
+libxfs_device_zero(dev_t dev, xfs_daddr_t start, uint len)
+{
+       xfs_daddr_t     bno;
+       uint            nblks;
+       int             size;
+       int             fd;
+       char            *z;
+
+       size = BDSTRAT_SIZE <= BBTOB(len) ? BDSTRAT_SIZE : BBTOB(len);
+       if ((z = memalign(getpagesize(), size)) == NULL) {
+               fprintf(stderr, "%s: device_zero can't memalign %d bytes: %s\n",
+                       progname, size, strerror(errno));
+               exit(1);
+       }
+       bzero(z, size);
+       fd = libxfs_device_to_fd(dev);
+       for (bno = start; bno < start + len; ) {
+               nblks = (uint)BTOBB(size);
+               if (bno + nblks > start + len)
+                       nblks = (uint)(start + len - bno);
+               if (lseek64(fd, BBTOOFF64(bno), SEEK_SET) < 0) {
+                       fprintf(stderr, "%s: device_zero lseek64 failed: %s\n",
+                               progname, strerror(errno));
+                       exit(1);
+               }
+               if (write(fd, z, BBTOB(nblks)) < BBTOB(nblks)) {
+                       fprintf(stderr, "%s: device_zero write failed: %s\n",
+                               progname, strerror(errno));
+                       exit(1);
+               }
+               bno += nblks;
+       }
+       free(z);
+}
+
+int
+libxfs_log_clear(
+        dev_t       device, 
+        xfs_daddr_t start,
+        uint        length,
+        uuid_t      *fs_uuid, 
+        int         fmt)
+{
+       xfs_buf_t               *buf;
+        xlog_rec_header_t       *head;
+        xlog_op_header_t        *op;
+        /* the data section must be 32 bit size aligned */
+        struct {
+            __uint16_t magic;
+            __uint16_t pad1;
+            __uint32_t pad2; /* may as well make it 64 bits */
+        } magic = { XLOG_UNMOUNT_TYPE, 0, 0 };
+                
+       if (!device || !fs_uuid)
+               return -EINVAL;
+        
+        /* first zero the log */
+        libxfs_device_zero(device, start, length);   
+                   
+        /* then write a log record header */
+        buf = libxfs_getbuf(device, start, 1);
+        if (!buf) 
+            return -1;
+        
+        memset(XFS_BUF_PTR(buf), 0, BBSIZE);
+       head = (xlog_rec_header_t *)XFS_BUF_PTR(buf);
+        
+        /* note that oh_tid actually contains the cycle number
+         * and the tid is stored in h_cycle_data[0] - that's the
+         * way things end up on disk.
+         */
+        
+       INT_SET(head->h_magicno,        ARCH_CONVERT, XLOG_HEADER_MAGIC_NUM);
+       INT_SET(head->h_cycle,          ARCH_CONVERT, 1);
+       INT_SET(head->h_version,        ARCH_CONVERT, 1);
+       INT_SET(head->h_len,            ARCH_CONVERT, 20);
+       INT_SET(head->h_chksum,         ARCH_CONVERT, 0);
+       INT_SET(head->h_prev_block,     ARCH_CONVERT, -1);
+       INT_SET(head->h_num_logops,     ARCH_CONVERT, 1);
+       INT_SET(head->h_cycle_data[0],  ARCH_CONVERT, 0xb0c0d0d0);
+       INT_SET(head->h_fmt,            ARCH_CONVERT, fmt);
+        
+        ASSIGN_ANY_LSN(head->h_lsn,         1, 0, ARCH_CONVERT);
+        ASSIGN_ANY_LSN(head->h_tail_lsn,    1, 0, ARCH_CONVERT);
+        
+        memcpy(head->h_fs_uuid,  fs_uuid, sizeof(uuid_t));
+        
+        if (libxfs_writebuf(buf, 0))
+            return -1;
+         
+        buf = libxfs_getbuf(device, start + 1, 1);
+        if (!buf) 
+            return -1;
+        
+        /* now a log unmount op */
+        memset(XFS_BUF_PTR(buf), 0, BBSIZE);
+       op = (xlog_op_header_t *)XFS_BUF_PTR(buf);
+       INT_SET(op->oh_tid,             ARCH_CONVERT, 1);
+       INT_SET(op->oh_len,             ARCH_CONVERT, sizeof(magic));
+       INT_SET(op->oh_clientid,        ARCH_CONVERT, XFS_LOG);
+       INT_SET(op->oh_flags,           ARCH_CONVERT, XLOG_UNMOUNT_TRANS);
+       INT_SET(op->oh_res2,            ARCH_CONVERT, 0);
+        
+        /* and the data for this op */
+        
+        memcpy(XFS_BUF_PTR(buf) + sizeof(xlog_op_header_t), 
+                &magic, 
+                sizeof(magic));
+        
+        if (libxfs_writebuf(buf, 0))
+            return -1;
+
+       return 0;
+}
+
+/*
+ * Simple I/O interface
+ */
+
+xfs_buf_t *
+libxfs_getbuf(dev_t device, xfs_daddr_t blkno, int len)
+{
+       xfs_buf_t       *buf;
+       size_t          total;
+
+       total = sizeof(xfs_buf_t) + BBTOB(len);
+       if ((buf = calloc(total, 1)) == NULL) {
+               fprintf(stderr, "%s: buf calloc failed (%d bytes): %s\n",
+                       progname, total, strerror(errno));
+               exit(1);
+       }
+       /* by default, we allocate buffer directly after the header */
+       buf->b_blkno = blkno;
+       buf->b_bcount = BBTOB(len);
+       buf->b_dev = device;
+       buf->b_addr = (char *)(&buf->b_addr + 1);       /* must be last field */
+#ifdef IO_DEBUG
+       fprintf(stderr, "getbuf allocated %ubytes, blkno=%llu(%llu), %p\n",
+               BBTOB(len), BBTOOFF64(blkno), blkno, buf);
+#endif
+
+       return(buf);
+}
+
+int
+libxfs_readbufr(dev_t dev, xfs_daddr_t blkno, xfs_buf_t *buf, int len, int die)
+{
+       int     fd = libxfs_device_to_fd(dev);
+
+       buf->b_dev = dev;
+       buf->b_blkno = blkno;
+       ASSERT(BBTOB(len) <= buf->b_bcount);
+
+       if (lseek64(fd, BBTOOFF64(blkno), SEEK_SET) < 0) {
+               fprintf(stderr, "%s: lseek64 to %llu failed: %s\n",
+                       progname, BBTOOFF64(blkno), strerror(errno));
+               ASSERT(0);
+               if (die)
+                       exit(1);
+               return errno;
+       }
+       if (read(fd, buf->b_addr, BBTOB(len)) < 0) {
+               fprintf(stderr, "%s: read failed: %s\n",
+                       progname, strerror(errno));
+               if (die)
+                       exit(1);
+               return errno;
+       }
+#ifdef IO_DEBUG
+       fprintf(stderr, "readbufr read %ubytes, blkno=%llu(%llu), %p\n",
+               BBTOB(len), BBTOOFF64(blkno), blkno, buf);
+#endif
+       return 0;
+}
+
+xfs_buf_t *
+libxfs_readbuf(dev_t dev, xfs_daddr_t blkno, int len, int die)
+{
+       xfs_buf_t       *buf;
+       int             error;
+
+       buf = libxfs_getbuf(dev, blkno, len);
+       error = libxfs_readbufr(dev, blkno, buf, len, die);
+       if (error) {
+               libxfs_putbuf(buf);
+               return NULL;
+       }
+       return buf;
+}
+
+xfs_buf_t *
+libxfs_getsb(xfs_mount_t *mp, int die)
+{
+       return libxfs_readbuf(mp->m_dev, XFS_SB_DADDR,
+                               XFS_FSB_TO_BB(mp, 1), die);
+}
+
+int
+libxfs_writebuf_int(xfs_buf_t *buf, int die)
+{
+       int     sts;
+       int     fd = libxfs_device_to_fd(buf->b_dev);
+
+       if (lseek64(fd, BBTOOFF64(buf->b_blkno), SEEK_SET) < 0) {
+               fprintf(stderr, "%s: lseek64 to %llu failed: %s\n",
+                       progname, BBTOOFF64(buf->b_blkno), strerror(errno));
+               ASSERT(0);
+               if (die)
+                       exit(1);
+               return errno;
+       }
+#ifdef IO_DEBUG
+       fprintf(stderr, "writing %ubytes at blkno=%llu(%llu), %p\n",
+               buf->b_bcount, BBTOOFF64(buf->b_blkno), buf->b_blkno, buf);
+#endif
+       sts = write(fd, buf->b_addr, buf->b_bcount);
+       if (sts < 0) {
+               fprintf(stderr, "%s: write failed: %s\n",
+                       progname, strerror(errno));
+               ASSERT(0);
+               if (die)
+                       exit(1);
+               return errno;
+       }
+       else if (sts != buf->b_bcount) {
+               fprintf(stderr, "%s: error - wrote only %d of %d bytes\n",
+                       progname, sts, buf->b_bcount);
+               if (die)
+                       exit(1);
+               return EIO;
+       }
+       return 0;
+}
+
+int
+libxfs_writebuf(xfs_buf_t *buf, int die)
+{
+       int error = libxfs_writebuf_int(buf, die);
+       libxfs_putbuf(buf);
+       return error;
+}
+
+void
+libxfs_putbuf(xfs_buf_t *buf)
+{
+       if (buf != NULL) {
+                xfs_buf_log_item_t     *bip; 
+                extern xfs_zone_t       *xfs_buf_item_zone;   
+                    
+               bip = XFS_BUF_FSPRIVATE(buf, xfs_buf_log_item_t *);
+                
+                if (bip)
+                    libxfs_zone_free(xfs_buf_item_zone, bip);
+#ifdef IO_DEBUG
+               fprintf(stderr, "putbuf released %ubytes, %p\n",
+                       buf->b_bcount, buf);
+#endif
+               free(buf);
+               buf = NULL;
+       }
+}
+
+
+/*
+ * Simple memory interface
+ */
+
+xfs_zone_t *
+libxfs_zone_init(int size, char *name)
+{
+       xfs_zone_t      *ptr;
+
+       if ((ptr = malloc(sizeof(xfs_zone_t))) == NULL) {
+               fprintf(stderr, "%s: zone init failed (%s, %d bytes): %s\n",
+                       progname, name, sizeof(xfs_zone_t), strerror(errno));
+               exit(1);
+       }
+       ptr->zone_unitsize = size;
+       ptr->zone_name = name;
+#ifdef MEM_DEBUG
+        ptr->allocated = 0;
+       fprintf(stderr, "new zone %p for \"%s\", size=%d\n", ptr, name, size);
+#endif
+       return ptr;
+}
+
+void *
+libxfs_zone_zalloc(xfs_zone_t *z)
+{
+       void    *ptr;
+
+       ASSERT(z != NULL);
+       if ((ptr = calloc(z->zone_unitsize, 1)) == NULL) {
+               fprintf(stderr, "%s: zone calloc failed (%s, %d bytes): %s\n",
+                       progname, z->zone_name, z->zone_unitsize,
+                       strerror(errno));
+               exit(1);
+       }
+#ifdef MEM_DEBUG
+        z->allocated++;
+       fprintf(stderr, "## zone alloc'd item %p from %s (%d bytes) (%d active)\n", 
+                ptr, z->zone_name,  z->zone_unitsize,
+                z->allocated);
+#endif
+       return ptr;
+}
+
+void
+libxfs_zone_free(xfs_zone_t *z, void *ptr)
+{
+#ifdef MEM_DEBUG
+        z->allocated--;
+       fprintf(stderr, "## zone freed item %p from %s (%d bytes) (%d active)\n", 
+                ptr, z->zone_name, z->zone_unitsize,
+                z->allocated);
+#endif
+       if (ptr != NULL) {
+               free(ptr);
+               ptr = NULL;
+       }
+}
+
+void *
+libxfs_malloc(size_t size)
+{
+       void    *ptr;
+
+       if ((ptr = malloc(size)) == NULL) {
+               fprintf(stderr, "%s: malloc failed (%d bytes): %s\n",
+                       progname, size, strerror(errno));
+               exit(1);
+       }
+#ifdef MEM_DEBUG
+       fprintf(stderr, "## malloc'd item %p size %d bytes\n", 
+                ptr, size);
+#endif
+       return ptr;
+}
+
+void
+libxfs_free(void *ptr)
+{
+#ifdef MEM_DEBUG
+       fprintf(stderr, "## freed item %p\n", 
+                ptr);
+#endif
+       if (ptr != NULL) {
+               free(ptr);
+               ptr = NULL;
+       }
+}
+
+void *
+libxfs_realloc(void *ptr, size_t size)
+{
+#ifdef MEM_DEBUG
+        void *optr=ptr;
+#endif
+       if ((ptr = realloc(ptr, size)) == NULL) {
+               fprintf(stderr, "%s: realloc failed (%d bytes): %s\n",
+                       progname, size, strerror(errno));
+               exit(1);
+       }
+#ifdef MEM_DEBUG
+       fprintf(stderr, "## realloc'd item %p now %p size %d bytes\n", 
+                optr, ptr, size);
+#endif
+       return ptr;
+}
+
+
+int
+libxfs_iget(xfs_mount_t *mp, xfs_trans_t *tp, xfs_ino_t ino, uint lock_flags,
+               xfs_inode_t **ipp, xfs_daddr_t bno)
+{
+       xfs_inode_t     *ip;
+       int             error;
+
+       error = libxfs_iread(mp, tp, ino, &ip, bno);
+       if (error)
+               return error;
+       *ipp = ip;
+       return 0;
+}
+
+void
+libxfs_iput(xfs_inode_t *ip, uint lock_flags)
+{
+        extern xfs_zone_t       *xfs_ili_zone;
+       extern xfs_zone_t       *xfs_inode_zone;
+
+       if (ip != NULL) {
+            
+                /* free attached inode log item */
+               if (ip->i_itemp)
+                       libxfs_zone_free(xfs_ili_zone, ip->i_itemp);
+                ip->i_itemp = NULL;
+                
+               libxfs_zone_free(xfs_inode_zone, ip);
+               ip = NULL;
+       }
+}
+
+/*
+ * libxfs_mod_sb can be used to copy arbitrary changes to the
+ * in-core superblock into the superblock buffer to be logged.
+ *
+ * In user-space, we simply convert to big-endian, and write the
+ * the whole superblock - the in-core changes have all been made
+ * already.
+ */
+void
+libxfs_mod_sb(xfs_trans_t *tp, __int64_t fields)
+{
+       int             fd;
+       xfs_buf_t       *bp;
+       xfs_mount_t     *mp;
+
+       mp = tp->t_mountp;
+       bp = libxfs_getbuf(mp->m_dev, XFS_SB_DADDR, 1);
+       libxfs_xlate_sb(XFS_BUF_PTR(bp), &mp->m_sb, -1, ARCH_CONVERT,
+                       XFS_SB_ALL_BITS);
+       libxfs_writebuf(bp, 1);
+}
diff --git a/libxfs/trans.c b/libxfs/trans.c
new file mode 100644 (file)
index 0000000..980d69a
--- /dev/null
@@ -0,0 +1,754 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <xfs.h>
+
+/*
+ * Simple transaction interface
+ */
+
+xfs_trans_t *
+libxfs_trans_alloc(xfs_mount_t *mp, int type)
+{
+       xfs_trans_t     *ptr;
+
+       if ((ptr = calloc(sizeof(xfs_trans_t), 1)) == NULL) {
+               fprintf(stderr, "%s: xact calloc failed (%d bytes): %s\n",
+                       progname, sizeof(xfs_trans_t), strerror(errno));
+               exit(1);
+       }
+       ptr->t_mountp = mp;
+       ptr->t_type = type;
+       ptr->t_items_free = XFS_LIC_NUM_SLOTS;
+       XFS_LIC_INIT(&(ptr->t_items));
+#ifdef XACT_DEBUG
+       fprintf(stderr, "allocated new transaction %p\n", ptr);
+#endif
+       return ptr;
+}
+
+xfs_trans_t *
+libxfs_trans_dup(xfs_trans_t *tp)
+{
+       xfs_trans_t *ptr;
+
+       ptr = libxfs_trans_alloc(tp->t_mountp, tp->t_type);
+#ifdef XACT_DEBUG
+       fprintf(stderr, "duplicated transaction %p (new=%p)\n", tp, ptr);
+#endif
+       return ptr;
+}
+
+int
+libxfs_trans_reserve(xfs_trans_t *tp,
+       uint blocks, uint logspace, uint rtextents, uint flags, uint logcount)
+{
+       xfs_sb_t        *mpsb = &tp->t_mountp->m_sb;
+
+       /*
+        * Attempt to reserve the needed disk blocks by decrementing
+        * the number needed from the number available.  This will
+        * fail if the count would go below zero.
+        */
+       if (blocks > 0) {
+               if (mpsb->sb_fdblocks < blocks)
+                       return ENOSPC;
+       }
+       /* user space, don't need log/RT stuff (preserve the API though) */
+       return 0;
+}
+
+void
+libxfs_trans_cancel(xfs_trans_t *tp, int flags)
+{
+#ifdef XACT_DEBUG
+        xfs_trans_t *otp = tp;
+#endif
+       if (tp != NULL) {
+               xfs_trans_free_items(tp, flags);
+               free(tp);
+               tp = NULL;
+       }
+#ifdef XACT_DEBUG
+       fprintf(stderr, "## cancelled transaction %p\n", otp);
+#endif
+}
+
+int
+libxfs_trans_iget(xfs_mount_t *mp, xfs_trans_t *tp, xfs_ino_t ino,
+               uint lock_flags, xfs_inode_t **ipp)
+{
+       int                     error;
+       xfs_inode_t             *ip;
+       xfs_inode_log_item_t    *iip;
+
+       if (tp == NULL)
+               return libxfs_iread(mp, tp, ino, ipp, 0);
+
+       error = libxfs_iread(mp, tp, ino, &ip, 0);
+       if (error)
+               return error;
+       ASSERT(ip != NULL);
+
+       if (ip->i_itemp == NULL)
+               xfs_inode_item_init(ip, mp);
+       iip = ip->i_itemp;
+       xfs_trans_add_item(tp, (xfs_log_item_t *)(iip));
+
+       /* initialize i_transp so we can find it incore */
+       ip->i_transp = tp;
+
+       *ipp = ip;
+       return 0;
+}
+
+void
+libxfs_trans_iput(xfs_trans_t *tp, xfs_inode_t *ip, uint lock_flags)
+{
+       xfs_inode_log_item_t    *iip;
+       xfs_log_item_desc_t     *lidp;
+
+       if (tp == NULL) {
+               libxfs_iput(ip, lock_flags);
+               return;
+       }
+
+       ASSERT(ip->i_transp == tp);
+       iip = ip->i_itemp;
+       ASSERT(iip != NULL);
+
+       lidp = xfs_trans_find_item(tp, (xfs_log_item_t *)iip);
+       ASSERT(lidp != NULL);
+       ASSERT(lidp->lid_item == (xfs_log_item_t *)iip);
+       ASSERT(!(lidp->lid_flags & XFS_LID_DIRTY));
+       xfs_trans_free_item(tp, lidp);
+
+       libxfs_iput(ip, lock_flags);
+}
+
+void
+libxfs_trans_ijoin(xfs_trans_t *tp, xfs_inode_t *ip, uint lock_flags)
+{
+       xfs_inode_log_item_t    *iip;
+
+       ASSERT(ip->i_transp == NULL);
+       if (ip->i_itemp == NULL)
+               xfs_inode_item_init(ip, ip->i_mount);
+       iip = ip->i_itemp;
+       ASSERT(iip->ili_flags == 0);
+       ASSERT(iip->ili_inode != NULL);
+
+       xfs_trans_add_item(tp, (xfs_log_item_t *)(iip));
+
+       ip->i_transp = tp;
+#ifdef XACT_DEBUG
+       fprintf(stderr, "ijoin'd inode %llu, transaction %p\n", ip->i_ino, tp);
+#endif
+}
+
+void
+libxfs_trans_ihold(xfs_trans_t *tp, xfs_inode_t *ip)
+{
+       ASSERT(ip->i_transp == tp);
+       ASSERT(ip->i_itemp != NULL);
+
+       ip->i_itemp->ili_flags |= XFS_ILI_HOLD;
+#ifdef XACT_DEBUG
+       fprintf(stderr, "ihold'd inode %llu, transaction %p\n", ip->i_ino, tp);
+#endif
+}
+
+void
+libxfs_trans_inode_alloc_buf(xfs_trans_t *tp, xfs_buf_t *bp)
+{
+       xfs_buf_log_item_t      *bip;
+
+       ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
+       ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
+       bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+       bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF;
+}
+
+/*
+ * This is called to mark the fields indicated in fieldmask as needing
+ * to be logged when the transaction is committed.  The inode must
+ * already be associated with the given transaction.
+ *
+ * The values for fieldmask are defined in xfs_inode_item.h.  We always
+ * log all of the core inode if any of it has changed, and we always log
+ * all of the inline data/extents/b-tree root if any of them has changed.
+ */
+void
+xfs_trans_log_inode(
+       xfs_trans_t     *tp,
+       xfs_inode_t     *ip,
+       uint            flags)
+{
+       xfs_log_item_desc_t     *lidp;
+
+       ASSERT(ip->i_transp == tp);
+       ASSERT(ip->i_itemp != NULL);
+#ifdef XACT_DEBUG
+       fprintf(stderr, "dirtied inode %llu, transaction %p\n", ip->i_ino, tp);
+#endif
+
+       lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)(ip->i_itemp));
+       ASSERT(lidp != NULL);
+
+       tp->t_flags |= XFS_TRANS_DIRTY;
+       lidp->lid_flags |= XFS_LID_DIRTY;
+
+       /*
+        * Always OR in the bits from the ili_last_fields field.
+        * This is to coordinate with the xfs_iflush() and xfs_iflush_done()
+        * routines in the eventual clearing of the ilf_fields bits.
+        * See the big comment in xfs_iflush() for an explanation of
+        * this coordination mechanism.
+        */
+       flags |= ip->i_itemp->ili_last_fields;
+       ip->i_itemp->ili_format.ilf_fields |= flags;
+}
+
+/*
+ * This is called to mark bytes first through last inclusive of the given
+ * buffer as needing to be logged when the transaction is committed.
+ * The buffer must already be associated with the given transaction.
+ * 
+ * First and last are numbers relative to the beginning of this buffer,
+ * so the first byte in the buffer is numbered 0 regardless of the
+ * value of b_blkno.
+ */
+void
+libxfs_trans_log_buf(xfs_trans_t *tp, xfs_buf_t *bp, uint first, uint last)
+{
+       xfs_buf_log_item_t      *bip;
+       xfs_log_item_desc_t     *lidp;
+
+       ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
+       ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
+       ASSERT((first <= last) && (last < XFS_BUF_COUNT(bp)));
+#ifdef XACT_DEBUG
+       fprintf(stderr, "dirtied buffer %p, transaction %p\n", bp, tp);
+#endif
+
+       bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+
+       lidp = xfs_trans_find_item(tp, (xfs_log_item_t *)bip);
+       ASSERT(lidp != NULL);
+
+       tp->t_flags |= XFS_TRANS_DIRTY;
+       lidp->lid_flags |= XFS_LID_DIRTY;
+       xfs_buf_item_log(bip, first, last);
+}
+
+void
+libxfs_trans_brelse(xfs_trans_t *tp, xfs_buf_t *bp)
+{
+       xfs_buf_log_item_t      *bip;
+       xfs_log_item_desc_t     *lidp;
+#ifdef XACT_DEBUG
+       fprintf(stderr, "released buffer %p, transaction %p\n", bp, tp);
+#endif
+
+       if (tp == NULL) {
+               ASSERT(XFS_BUF_FSPRIVATE2(bp, void *) == NULL);
+               libxfs_putbuf(bp);
+               return;
+       }
+       ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
+       bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+       ASSERT(bip->bli_item.li_type == XFS_LI_BUF);
+       lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)bip);
+       ASSERT(lidp != NULL);
+       if (bip->bli_recur > 0) {
+               bip->bli_recur--;
+               return;
+       }
+       /* If dirty, can't release till transaction committed */
+       if (lidp->lid_flags & XFS_LID_DIRTY) {
+               return;
+       }
+       xfs_trans_free_item(tp, lidp);
+       if (bip->bli_flags & XFS_BLI_HOLD) {
+               bip->bli_flags &= ~XFS_BLI_HOLD;
+       }
+       XFS_BUF_SET_FSPRIVATE2(bp, NULL);
+       libxfs_putbuf(bp);
+}
+
+void
+libxfs_trans_binval(xfs_trans_t *tp, xfs_buf_t *bp)
+{
+       xfs_log_item_desc_t     *lidp;
+       xfs_buf_log_item_t      *bip;
+#ifdef XACT_DEBUG
+       fprintf(stderr, "binval'd buffer %p, transaction %p\n", bp, tp);
+#endif
+
+       ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
+       ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
+
+       bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+       lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)bip);
+       ASSERT(lidp != NULL);
+       bip->bli_flags &= ~(XFS_BLI_DIRTY);
+       bip->bli_format.blf_flags &= ~XFS_BLI_INODE_BUF;
+       bip->bli_format.blf_flags |= XFS_BLI_CANCEL;
+       lidp->lid_flags |= XFS_LID_DIRTY;
+       tp->t_flags |= XFS_TRANS_DIRTY;
+}
+
+void
+libxfs_trans_bjoin(xfs_trans_t *tp, xfs_buf_t *bp)
+{
+       xfs_buf_log_item_t      *bip;
+
+       ASSERT(XFS_BUF_FSPRIVATE2(bp, void *) == NULL);
+#ifdef XACT_DEBUG
+       fprintf(stderr, "bjoin'd buffer %p, transaction %p\n", bp, tp);
+#endif
+
+       xfs_buf_item_init(bp, tp->t_mountp);
+       bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+       xfs_trans_add_item(tp, (xfs_log_item_t *)bip);
+       XFS_BUF_SET_FSPRIVATE2(bp, tp);
+}
+
+void
+libxfs_trans_bhold(xfs_trans_t *tp, xfs_buf_t *bp)
+{
+       xfs_buf_log_item_t      *bip;
+
+       ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
+       ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
+#ifdef XACT_DEBUG
+       fprintf(stderr, "bhold'd buffer %p, transaction %p\n", bp, tp);
+#endif
+
+       bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+       bip->bli_flags |= XFS_BLI_HOLD;
+}
+
+xfs_buf_t *
+libxfs_trans_get_buf(xfs_trans_t *tp, dev_t dev, xfs_daddr_t d, int len, uint f)
+{
+       xfs_buf_t               *bp;
+       xfs_buf_log_item_t      *bip;
+       buftarg_t               bdev = { dev };
+
+       if (tp == NULL)
+               return libxfs_getbuf(dev, d, len);
+
+       if (tp->t_items.lic_next == NULL)
+               bp = xfs_trans_buf_item_match(tp, &bdev, d, len);
+       else
+               bp = xfs_trans_buf_item_match_all(tp, &bdev, d, len);
+       if (bp != NULL) {
+               ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
+               bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+               ASSERT(bip != NULL);
+               bip->bli_recur++;
+               return bp;
+       }
+
+       bp = libxfs_getbuf(dev, d, len);
+       if (bp == NULL)
+               return NULL;
+#ifdef XACT_DEBUG
+       fprintf(stderr, "trans_get_buf buffer %p, transaction %p\n", bp, tp);
+#endif
+
+       xfs_buf_item_init(bp, tp->t_mountp);
+       bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*);
+       bip->bli_recur = 0;
+       xfs_trans_add_item(tp, (xfs_log_item_t *)bip);
+
+       /* initialize b_fsprivate2 so we can find it incore */
+       XFS_BUF_SET_FSPRIVATE2(bp, tp);
+       return bp;
+}
+
+int
+libxfs_trans_read_buf(xfs_mount_t *mp, xfs_trans_t *tp, dev_t dev,
+                       xfs_daddr_t blkno, int len, uint f, xfs_buf_t **bpp)
+{
+       xfs_buf_t               *bp;
+       xfs_buf_log_item_t      *bip;
+       int                     error;
+       buftarg_t               bdev = { dev };
+
+       if (tp == NULL) {
+               bp = libxfs_getbuf(mp->m_dev, blkno, len);
+               error = libxfs_readbufr(dev, blkno, bp, len, 0);
+               *bpp = bp;
+               return error;
+       }
+
+       if (tp->t_items.lic_next == NULL)
+               bp = xfs_trans_buf_item_match(tp, &bdev, blkno, len);
+       else
+               bp = xfs_trans_buf_item_match_all(tp, &bdev, blkno, len);
+       if (bp != NULL) {
+               ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
+               ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
+               bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*);
+               bip->bli_recur++;
+               *bpp = bp;
+               return 0;
+       }
+
+       bp = libxfs_getbuf(mp->m_dev, blkno, len);
+       error = libxfs_readbufr(dev, blkno, bp, len, 0);
+       if (error) {
+               *bpp = NULL;
+               return error;
+       }
+#ifdef XACT_DEBUG
+       fprintf(stderr, "trans_read_buf buffer %p, transaction %p\n", bp, tp);
+#endif
+
+       xfs_buf_item_init(bp, tp->t_mountp);
+       bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+       bip->bli_recur = 0;
+       xfs_trans_add_item(tp, (xfs_log_item_t *)bip);
+
+       /* initialise b_fsprivate2 so we can find it incore */
+       XFS_BUF_SET_FSPRIVATE2(bp, tp);
+       *bpp = bp;
+       return 0;
+}
+
+/*
+ * Record the indicated change to the given field for application
+ * to the file system's superblock when the transaction commits.
+ * For now, just store the change in the transaction structure.
+ * Mark the transaction structure to indicate that the superblock
+ * needs to be updated before committing. 
+ *
+ * Originally derived from xfs_trans_mod_sb().
+ */
+void
+libxfs_trans_mod_sb(xfs_trans_t *tp, uint field, long delta)
+{
+       switch (field) {
+       case XFS_TRANS_SB_RES_FDBLOCKS:
+               return;
+       case XFS_TRANS_SB_FDBLOCKS:
+               tp->t_fdblocks_delta += delta;
+               break;
+       case XFS_TRANS_SB_ICOUNT:
+               ASSERT(delta > 0);
+               tp->t_icount_delta += delta;
+               break;
+       case XFS_TRANS_SB_IFREE:
+               tp->t_ifree_delta += delta;
+               break;
+       case XFS_TRANS_SB_FREXTENTS:
+               tp->t_frextents_delta += delta;
+               break;
+       default:
+               ASSERT(0);
+               return;
+       }
+       tp->t_flags |= (XFS_TRANS_SB_DIRTY | XFS_TRANS_DIRTY);
+}
+
+
+/*
+ * Transaction commital code follows (i.e. write to disk in libxfs)
+ */
+
+STATIC void
+inode_item_done(xfs_inode_log_item_t *iip)
+{
+       xfs_dinode_t    *dip;
+       xfs_inode_t     *ip;
+       xfs_mount_t     *mp;
+       xfs_buf_t       *bp;
+       int             hold;
+       int             error;
+       extern xfs_zone_t *xfs_ili_zone;
+
+       ip = iip->ili_inode;
+       mp = iip->ili_item.li_mountp;
+       hold = iip->ili_flags & XFS_ILI_HOLD;
+       ASSERT(ip != NULL);
+
+       if (!(iip->ili_format.ilf_fields & XFS_ILOG_ALL)) {
+               ip->i_transp = NULL;    /* disassociate from transaction */
+               iip->ili_flags = 0;     /* reset all flags */
+               if (!hold)
+                       goto ili_done;
+               return;
+       }
+
+       /*
+        * Get the buffer containing the on-disk inode.
+        */
+       error = libxfs_itobp(mp, NULL, ip, &dip, &bp, 0);
+       if (error) {
+               fprintf(stderr, "%s: warning - itobp failed (%d)\n",
+                       progname, error);
+               goto ili_done;
+       }
+
+       XFS_BUF_SET_FSPRIVATE(bp, iip);
+       error = libxfs_iflush_int(ip, bp);
+       if (error) {
+               fprintf(stderr, "%s: warning - iflush_int failed (%d)\n",
+                       progname, error);
+               goto ili_done;
+       }
+
+       ip->i_transp = NULL;    /* disassociate from transaction */
+       XFS_BUF_SET_FSPRIVATE(bp, NULL);        /* remove log item */
+       XFS_BUF_SET_FSPRIVATE2(bp, NULL);       /* remove xact ptr */
+       libxfs_writebuf_int(bp, 0);
+#ifdef XACT_DEBUG
+       fprintf(stderr, "flushing dirty inode %llu, buffer %p (hold=%u)\n",
+                       ip->i_ino, bp, hold);
+#endif
+       if (hold) {
+               iip->ili_flags &= ~XFS_ILI_HOLD;
+               return;
+       }
+       else {
+               /*libxfs_iput(iip->ili_inode, 0);       - nathans TODO? */
+               libxfs_putbuf(bp);
+       }
+
+ili_done:
+       if (ip->i_itemp)
+               kmem_zone_free(xfs_ili_zone, ip->i_itemp);
+       else
+               ASSERT(0);
+       ip->i_itemp = NULL;
+}
+
+STATIC void
+buf_item_done(xfs_buf_log_item_t *bip)
+{
+       extern xfs_zone_t *xfs_buf_item_zone;
+       xfs_buf_t       *bp;
+       int             hold;
+
+       bp = bip->bli_buf;
+       ASSERT(bp != NULL);
+       XFS_BUF_SET_FSPRIVATE(bp, NULL);        /* remove log item */
+       XFS_BUF_SET_FSPRIVATE2(bp, NULL);       /* remove xact ptr */
+
+       hold = (bip->bli_flags & XFS_BLI_HOLD);
+       if (bip->bli_flags & XFS_BLI_DIRTY) {
+#ifdef XACT_DEBUG
+               fprintf(stderr, "flushing dirty buffer %p (hold=%d)\n",
+                       bp, hold);
+#endif
+               libxfs_writebuf_int(bp, 0);
+               if (hold)
+                       bip->bli_flags &= ~XFS_BLI_HOLD;
+               else
+                       libxfs_putbuf(bp);
+       }
+       /* release the buf item */
+       kmem_zone_free(xfs_buf_item_zone, bip);
+}
+
+/*
+ * This is called to perform the commit processing for each
+ * item described by the given chunk.
+ */
+static void
+trans_chunk_committed(xfs_log_item_chunk_t *licp)
+{
+       xfs_log_item_desc_t     *lidp;
+       xfs_log_item_t          *lip;
+       int                     i;
+
+       lidp = licp->lic_descs;
+       for (i = 0; i < licp->lic_unused; i++, lidp++) {
+               if (XFS_LIC_ISFREE(licp, i))
+                       continue;
+               lip = lidp->lid_item;
+               if (lip->li_type == XFS_LI_BUF)
+                       buf_item_done((xfs_buf_log_item_t *)lidp->lid_item);
+               else if (lip->li_type == XFS_LI_INODE)
+                       inode_item_done((xfs_inode_log_item_t *)lidp->lid_item);
+               else {
+                       fprintf(stderr, "%s: unrecognised log item type\n",
+                               progname);
+                       ASSERT(0);
+               }
+       }
+}
+
+/*
+ * Calls trans_chunk_committed() to process the items in each chunk.
+ */
+static void
+trans_committed(xfs_trans_t *tp)
+{
+       xfs_log_item_chunk_t    *licp;
+       xfs_log_item_chunk_t    *next_licp;
+
+       /*
+        * Special case the chunk embedded in the transaction.
+        */
+       licp = &(tp->t_items);
+       if (!(XFS_LIC_ARE_ALL_FREE(licp))) {
+               trans_chunk_committed(licp);
+       }
+
+       /*
+        * Process the items in each chunk in turn.
+        */
+       licp = licp->lic_next;
+       while (licp != NULL) {
+               trans_chunk_committed(licp);
+               next_licp = licp->lic_next;
+               kmem_free(licp, sizeof(xfs_log_item_chunk_t));
+               licp = next_licp;
+       }
+}
+
+/*
+ * Unlock each item pointed to by a descriptor in the given chunk.
+ * Free descriptors pointing to items which are not dirty if freeing_chunk
+ * is zero. If freeing_chunk is non-zero, then we need to unlock all
+ * items in the chunk.  Return the number of descriptors freed.
+ * Originally based on xfs_trans_unlock_chunk() - adapted for libxfs
+ * transactions though.
+ */
+int
+xfs_trans_unlock_chunk(
+       xfs_log_item_chunk_t    *licp,
+       int                     freeing_chunk,
+       int                     abort,
+       xfs_lsn_t               commit_lsn)     /* nb: unused */
+{
+       xfs_log_item_desc_t     *lidp;
+       xfs_log_item_t          *lip;
+       int                     i;
+       int                     freed;
+
+       freed = 0;
+       lidp = licp->lic_descs;
+       for (i = 0; i < licp->lic_unused; i++, lidp++) {
+               if (XFS_LIC_ISFREE(licp, i)) {
+                       continue;
+               }
+               lip = lidp->lid_item;
+               lip->li_desc = NULL;
+
+               /*
+                * Disassociate the logged item from this transaction
+                */
+               if (lip->li_type == XFS_LI_BUF) {
+                       xfs_buf_log_item_t      *bip;
+
+                       bip = (xfs_buf_log_item_t *)lidp->lid_item;
+                       XFS_BUF_SET_FSPRIVATE2(bip->bli_buf, NULL);
+                       bip->bli_flags &= ~XFS_BLI_HOLD;
+               }
+               else if (lip->li_type == XFS_LI_INODE) {
+                       xfs_inode_log_item_t    *iip;
+
+                       iip = (xfs_inode_log_item_t*)lidp->lid_item;
+                       iip->ili_inode->i_transp = NULL;
+                       iip->ili_flags &= ~XFS_ILI_HOLD;
+               }
+               else {
+                       fprintf(stderr, "%s: unrecognised log item type\n",
+                               progname);
+                       ASSERT(0);
+               }
+
+               /*
+                * Free the descriptor if the item is not dirty
+                * within this transaction and the caller is not
+                * going to just free the entire thing regardless.
+                */
+               if (!(freeing_chunk) &&
+                   (!(lidp->lid_flags & XFS_LID_DIRTY) || abort)) {
+                       XFS_LIC_RELSE(licp, i);
+                       freed++;
+               }
+       }
+
+       return (freed);
+}
+
+
+/*
+ * Commit the changes represented by this transaction
+ */
+int
+libxfs_trans_commit(xfs_trans_t *tp, uint flags, xfs_lsn_t *commit_lsn_p)
+{
+       xfs_sb_t        *sbp;
+       int             error;
+
+       if (tp == NULL)
+               return 0;
+
+       if (!(tp->t_flags & XFS_TRANS_DIRTY)) {
+#ifdef XACT_DEBUG
+               fprintf(stderr, "committed clean transaction %p\n", tp);
+#endif
+               xfs_trans_free_items(tp, flags);
+               free(tp);
+               tp = NULL;
+               return 0;
+       }
+
+       if (tp->t_flags & XFS_TRANS_SB_DIRTY) {
+               sbp = &(tp->t_mountp->m_sb);
+               if (tp->t_icount_delta)
+                       sbp->sb_icount += tp->t_icount_delta;
+               if (tp->t_ifree_delta)
+                       sbp->sb_ifree += tp->t_ifree_delta;
+               if (tp->t_fdblocks_delta)
+                       sbp->sb_fdblocks += tp->t_fdblocks_delta;
+               if (tp->t_frextents_delta)
+                       sbp->sb_frextents += tp->t_frextents_delta;
+               libxfs_mod_sb(tp, XFS_SB_ALL_BITS);
+       }
+
+#ifdef XACT_DEBUG
+       fprintf(stderr, "committing dirty transaction %p\n", tp);
+#endif
+       trans_committed(tp);
+
+       /* That's it for the transaction structure.  Free it. */
+       free(tp);
+       tp = NULL;
+       return 0;
+}
diff --git a/libxfs/util.c b/libxfs/util.c
new file mode 100644 (file)
index 0000000..44222c3
--- /dev/null
@@ -0,0 +1,735 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <xfs.h>
+#include <time.h>
+
+/*
+ * Wrapper around call to libxfs_ialloc. Takes care of committing and
+ * allocating a new transaction as needed.
+ *
+ * Originally there were two copies of this code - one in mkfs, the
+ * other in repair - now there is just the one.
+ */
+int
+libxfs_inode_alloc(
+       xfs_trans_t     **tp,
+       xfs_inode_t     *pip,
+       mode_t          mode,
+       ushort          nlink,
+       dev_t           rdev,
+       cred_t          *cr,
+       xfs_inode_t     **ipp)
+{
+       boolean_t       call_again;
+       int             i;
+       xfs_buf_t       *ialloc_context;
+       xfs_inode_t     *ip;
+       xfs_trans_t     *ntp;
+       int             error;
+
+       call_again = B_FALSE;
+       ialloc_context = (xfs_buf_t *)0;
+       error = libxfs_ialloc(*tp, pip, mode, nlink, rdev, cr, (xfs_prid_t) 0,
+                          1, &ialloc_context, &call_again, &ip);
+       if (error) {
+               return error;
+       }
+       if (call_again) {
+               xfs_trans_bhold(*tp, ialloc_context);
+               ntp = xfs_trans_dup(*tp);
+               xfs_trans_commit(*tp, 0, NULL);
+               *tp = ntp;
+               if ((i = xfs_trans_reserve(*tp, 0, 0, 0, 0, 0))) {
+                       fprintf(stderr, "%s: cannot reserve space: %s\n",
+                               progname, strerror(errno));
+                       exit(1);
+               }
+               xfs_trans_bjoin(*tp, ialloc_context);
+               error = libxfs_ialloc(*tp, pip, mode, nlink, rdev, cr,
+                                  (xfs_prid_t) 0, 1, &ialloc_context,
+                                  &call_again, &ip);
+               if (error) {
+                       return error;
+               }
+       }
+       *ipp = ip;
+       ASSERT(ip);
+       return error;
+}
+
+/*
+ * Change the requested timestamp in the given inode.
+ * 
+ * This was once shared with the kernel, but has diverged to the point
+ * where its no longer worth the hassle of maintaining common code.
+ */
+void
+libxfs_ichgtime(xfs_inode_t *ip, int flags)
+{
+       struct timespec tv;
+       struct timeval  stv;
+
+       gettimeofday(&stv, (struct timezone *)0);
+       tv.tv_sec = stv.tv_sec;
+       tv.tv_nsec = stv.tv_usec * 1000;
+       if (flags & XFS_ICHGTIME_MOD) {
+               ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec;
+               ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec;
+       }
+       if (flags & XFS_ICHGTIME_ACC) {
+               ip->i_d.di_atime.t_sec = (__int32_t)tv.tv_sec;
+               ip->i_d.di_atime.t_nsec = (__int32_t)tv.tv_nsec;
+       }
+       if (flags & XFS_ICHGTIME_CHG) {
+               ip->i_d.di_ctime.t_sec = (__int32_t)tv.tv_sec;
+               ip->i_d.di_ctime.t_nsec = (__int32_t)tv.tv_nsec;
+       }
+}
+
+/*
+ * Allocate an inode on disk and return a copy of it's in-core version.
+ * Set mode, nlink, and rdev appropriately within the inode.
+ * The uid and gid for the inode are set according to the contents of
+ * the given cred structure.
+ *
+ * This was once shared with the kernel, but has diverged to the point
+ * where its no longer worth the hassle of maintaining common code.
+ */
+int
+libxfs_ialloc(
+       xfs_trans_t     *tp,
+       xfs_inode_t     *pip,
+       mode_t          mode,
+       nlink_t         nlink,
+       dev_t           rdev,
+       cred_t          *cr,
+       xfs_prid_t      prid,
+       int             okalloc,
+       xfs_buf_t       **ialloc_context,
+       boolean_t       *call_again,
+       xfs_inode_t     **ipp)
+{
+       xfs_ino_t       ino;
+       xfs_inode_t     *ip;
+       uint            flags;
+       int             error;
+
+       /*
+        * Call the space management code to pick
+        * the on-disk inode to be allocated.
+        */
+       error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc,
+                           ialloc_context, call_again, &ino);
+       if (error != 0)
+               return error;
+       if (*call_again || ino == NULLFSINO) {
+               *ipp = NULL;
+               return 0;
+       }
+       ASSERT(*ialloc_context == NULL);
+
+       error = xfs_trans_iget(tp->t_mountp, tp, ino, 0, &ip);
+       if (error != 0)
+               return error;
+       ASSERT(ip != NULL);
+
+       ip->i_d.di_mode = (__uint16_t)mode;
+       ip->i_d.di_onlink = 0;
+       ip->i_d.di_nlink = nlink;
+       ASSERT(ip->i_d.di_nlink == nlink);
+       ip->i_d.di_uid = cr->cr_uid;
+       ip->i_d.di_gid = cr->cr_gid;
+       ip->i_d.di_projid = prid;
+       bzero(&(ip->i_d.di_pad[0]), sizeof(ip->i_d.di_pad));
+
+       /*
+        * If the superblock version is up to where we support new format
+        * inodes and this is currently an old format inode, then change
+        * the inode version number now.  This way we only do the conversion
+        * here rather than here and in the flush/logging code.
+        */
+       if (XFS_SB_VERSION_HASNLINK(&tp->t_mountp->m_sb) &&
+           ip->i_d.di_version == XFS_DINODE_VERSION_1) {
+               ip->i_d.di_version = XFS_DINODE_VERSION_2;
+               /* old link count, projid field, pad field already zeroed */
+        }
+
+       ip->i_d.di_size = 0;
+       ip->i_d.di_nextents = 0;
+       ASSERT(ip->i_d.di_nblocks == 0);
+       xfs_ichgtime(ip, XFS_ICHGTIME_CHG|XFS_ICHGTIME_ACC|XFS_ICHGTIME_MOD);
+       /*
+        * di_gen will have been taken care of in xfs_iread.
+        */
+       ip->i_d.di_extsize = 0;
+       ip->i_d.di_dmevmask = 0;
+       ip->i_d.di_dmstate = 0;
+       ip->i_d.di_flags = 0;
+       flags = XFS_ILOG_CORE;
+       switch (mode & IFMT) {
+       case IFIFO:
+       case IFCHR:
+       case IFBLK:
+       case IFSOCK:
+               ip->i_d.di_format = XFS_DINODE_FMT_DEV;
+               ip->i_df.if_u2.if_rdev = makedev(major(rdev), minor(rdev));                     ip->i_df.if_flags = 0;
+               flags |= XFS_ILOG_DEV;
+               break;
+       case IFREG:
+       case IFDIR:
+       case IFLNK:
+               ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
+               ip->i_df.if_flags = XFS_IFEXTENTS;
+               ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0;
+               ip->i_df.if_u1.if_extents = NULL;
+               break;
+       default:
+               ASSERT(0);
+       }
+       /* Attribute fork settings for new inode. */
+       ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
+       ip->i_d.di_anextents = 0;
+
+       /*
+        * Log the new values stuffed into the inode.
+        */
+       xfs_trans_log_inode(tp, ip, flags);
+       *ipp = ip;
+       return 0;
+}
+
+void
+libxfs_iprint(xfs_inode_t *ip)
+{
+       xfs_dinode_core_t       *dip;
+       xfs_bmbt_rec_t  *ep;
+       xfs_extnum_t    i;
+       xfs_extnum_t    nextents;
+
+       printf("Inode %p\n", ip);
+       printf("    i_dev %x\n", (uint)ip->i_dev);
+       printf("    i_ino %Lx\n", ip->i_ino);
+
+       if (ip->i_df.if_flags & XFS_IFEXTENTS)
+               printf("EXTENTS ");
+       printf("\n");
+       printf("    i_df.if_bytes %d\n", ip->i_df.if_bytes);
+       printf("    i_df.if_u1.if_extents/if_data %p\n", ip->i_df.if_u1.if_extents);
+       if (ip->i_df.if_flags & XFS_IFEXTENTS) {
+               nextents = ip->i_df.if_bytes / (uint)sizeof(*ep);
+               for (ep = ip->i_df.if_u1.if_extents, i = 0; i < nextents; i++, ep++) {
+                       xfs_bmbt_irec_t rec;
+
+                       xfs_bmbt_get_all(ep, &rec);
+                       printf("\t%d: startoff %Lu, startblock 0x%Lx,"
+                       " blockcount %Lu, state %d\n",
+                               i, (xfs_dfiloff_t)rec.br_startoff,
+                               (xfs_dfsbno_t)rec.br_startblock,
+                               (xfs_dfilblks_t)rec.br_blockcount,
+                               (int)rec.br_state);
+               }
+       }
+       printf("    i_df.if_broot %p\n", ip->i_df.if_broot);
+       printf("    i_df.if_broot_bytes %x\n", ip->i_df.if_broot_bytes);
+
+       dip = &(ip->i_d);
+       printf("\nOn disk portion\n");
+       printf("    di_magic %x\n", dip->di_magic);
+       printf("    di_mode %o\n", dip->di_mode);
+       printf("    di_version %x\n", (uint)dip->di_version);
+       switch (ip->i_d.di_format) {
+       case XFS_DINODE_FMT_LOCAL:
+               printf("    Inline inode\n");
+               break;
+       case XFS_DINODE_FMT_EXTENTS:
+               printf("    Extents inode\n");
+               break;
+       case XFS_DINODE_FMT_BTREE:
+               printf("    B-tree inode\n");
+               break;
+       default:
+               printf("    Other inode\n");
+               break;
+       }
+       printf("   di_nlink %x\n", dip->di_nlink);
+       printf("   di_uid %d\n", dip->di_uid);
+       printf("   di_gid %d\n", dip->di_gid);
+       printf("   di_nextents %d\n", dip->di_nextents);
+       printf("   di_size %Ld\n", dip->di_size);
+       printf("   di_gen %x\n", dip->di_gen);
+       printf("   di_extsize %d\n", dip->di_extsize);
+       printf("   di_flags %x\n", dip->di_flags);
+       printf("   di_nblocks %Ld\n", dip->di_nblocks);
+}
+
+/*
+ * Writes a modified inode's changes out to the inode's on disk home.
+ * Originally based on xfs_iflush_int() from xfs_inode.c in the kernel.
+ */
+int
+libxfs_iflush_int(xfs_inode_t *ip, xfs_buf_t *bp)
+{
+       xfs_inode_log_item_t    *iip;
+       xfs_dinode_t            *dip;
+       xfs_mount_t             *mp;
+
+       ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
+       ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
+               ip->i_d.di_nextents > ip->i_df.if_ext_max);
+
+       iip = ip->i_itemp;
+       mp = ip->i_mount;
+
+       /* set *dip = inode's place in the buffer */
+       dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_boffset);
+
+#ifdef DEBUG
+       ASSERT(ip->i_d.di_magic == XFS_DINODE_MAGIC);
+       if ((ip->i_d.di_mode & IFMT) == IFREG) {
+               ASSERT( (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS) ||
+                       (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) );
+       }
+       else if ((ip->i_d.di_mode & IFMT) == IFDIR) {
+               ASSERT( (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS) ||
+                       (ip->i_d.di_format == XFS_DINODE_FMT_BTREE)   ||
+                       (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL) );
+       }
+       ASSERT(ip->i_d.di_nextents+ip->i_d.di_anextents <= ip->i_d.di_nblocks);
+       ASSERT(ip->i_d.di_forkoff <= mp->m_sb.sb_inodesize);
+#endif
+
+       /*
+        * Copy the dirty parts of the inode into the on-disk
+        * inode.  We always copy out the core of the inode,
+        * because if the inode is dirty at all the core must
+        * be.
+        */
+       xfs_xlate_dinode_core((xfs_caddr_t)&(dip->di_core), &(ip->i_d), -1,
+                               ARCH_CONVERT);
+       /*
+        * If this is really an old format inode and the superblock version
+        * has not been updated to support only new format inodes, then
+        * convert back to the old inode format.  If the superblock version
+        * has been updated, then make the conversion permanent.
+        */
+       ASSERT(ip->i_d.di_version == XFS_DINODE_VERSION_1 ||
+               XFS_SB_VERSION_HASNLINK(&mp->m_sb));
+       if (ip->i_d.di_version == XFS_DINODE_VERSION_1) {
+               if (!XFS_SB_VERSION_HASNLINK(&mp->m_sb)) {
+                       /*
+                        * Convert it back.
+                        */
+                       ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1);
+                       INT_SET(dip->di_core.di_onlink, ARCH_CONVERT,
+                               ip->i_d.di_nlink);
+               } else {
+                       /*
+                        * The superblock version has already been bumped,
+                        * so just make the conversion to the new inode
+                        * format permanent.
+                        */
+                       ip->i_d.di_version = XFS_DINODE_VERSION_2;
+                       INT_SET(dip->di_core.di_version, ARCH_CONVERT,
+                               XFS_DINODE_VERSION_2);
+                       ip->i_d.di_onlink = 0;
+                       INT_ZERO(dip->di_core.di_onlink, ARCH_CONVERT);
+                       bzero(&(ip->i_d.di_pad[0]), sizeof(ip->i_d.di_pad));
+                       bzero(&(dip->di_core.di_pad[0]),
+                               sizeof(dip->di_core.di_pad));
+                       ASSERT(ip->i_d.di_projid == 0);
+               }
+       }
+
+       if (xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK, bp) == EFSCORRUPTED)
+               return EFSCORRUPTED;
+       if (XFS_IFORK_Q(ip)) {
+               /* The only error from xfs_iflush_fork is on the data fork. */
+               xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK, bp);
+       }
+
+       return 0;
+}
+
+/*
+ * Given a block number in a fork, return the next valid block number
+ * (not a hole).
+ * If this is the last block number then NULLFILEOFF is returned.
+ *
+ * This was originally in the kernel, but only used in xfs_repair.
+ */
+int
+libxfs_bmap_next_offset(
+       xfs_trans_t     *tp,                    /* transaction pointer */
+       xfs_inode_t     *ip,                    /* incore inode */
+       xfs_fileoff_t   *bnop,                  /* current block */
+       int             whichfork)              /* data or attr fork */
+{
+       xfs_fileoff_t   bno;                    /* current block */
+       int             eof;                    /* hit end of file */
+       int             error;                  /* error return value */
+       xfs_bmbt_irec_t got;                    /* current extent value */
+       xfs_ifork_t     *ifp;                   /* inode fork pointer */
+       xfs_extnum_t    lastx;                  /* last extent used */
+       xfs_bmbt_irec_t prev;                   /* previous extent value */
+
+       if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
+           XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
+           XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL)
+              return XFS_ERROR(EIO);
+       if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
+               *bnop = NULLFILEOFF;
+               return 0;
+       }
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       if (!(ifp->if_flags & XFS_IFEXTENTS) &&
+           (error = xfs_iread_extents(tp, ip, whichfork)))
+               return error;
+       bno = *bnop + 1;
+       xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, &prev);
+       if (eof)
+               *bnop = NULLFILEOFF;
+       else
+               *bnop = got.br_startoff < bno ? bno : got.br_startoff;
+       return 0;
+}
+
+/*
+ * Like xfs_dir_removename, but only for removing entries with
+ * (name, hashvalue) pairs that may not be consistent (hashvalue
+ * may not be correctly set for the name).
+ * 
+ * This was originally in the kernel, but only used in xfs_repair.
+ */
+int
+xfs_dir_bogus_removename(xfs_trans_t *trans, xfs_inode_t *dp, char *name,
+               xfs_fsblock_t *firstblock, xfs_bmap_free_t *flist,
+               xfs_extlen_t total, xfs_dahash_t hashval, int namelen)
+{
+       xfs_da_args_t args;
+       int count, totallen, newsize, retval;
+
+       ASSERT((dp->i_d.di_mode & IFMT) == IFDIR);
+       if (namelen >= MAXNAMELEN) {
+               return EINVAL;
+       }
+
+       /*
+        * Fill in the arg structure for this request.
+        */
+       args.name = name;
+       args.namelen = namelen;
+       args.hashval = hashval;
+       args.inumber = 0;
+       args.dp = dp;
+       args.firstblock = firstblock;
+       args.flist = flist;
+       args.total = total;
+       args.whichfork = XFS_DATA_FORK;
+       args.trans = trans;
+       args.justcheck = args.addname = 0;
+       args.oknoent = 1;
+
+       /*
+        * Decide on what work routines to call based on the inode size.
+        */
+       if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+               retval = xfs_dir_shortform_removename(&args);
+       } else if (xfs_bmap_one_block(dp, XFS_DATA_FORK)) {
+               retval = xfs_dir_leaf_removename(&args, &count, &totallen);
+               if (retval == 0) {
+                       newsize = XFS_DIR_SF_ALLFIT(count, totallen);
+                       if (newsize <= XFS_IFORK_DSIZE(dp)) {
+                               retval = xfs_dir_leaf_to_shortform(&args);
+                       }
+               }
+       } else {
+               retval = xfs_dir_node_removename(&args);
+       }
+       return(retval);
+}
+
+/*
+ * Like xfs_dir_removename, but only for removing entries with
+ * (name, hashvalue) pairs that may not be consistent (hashvalue
+ * may not be correctly set for the name).
+ * 
+ * This was originally in the kernel, but only used in xfs_repair.
+ */
+int
+xfs_dir2_bogus_removename(
+       xfs_trans_t     *tp,            /* transaction pointer */
+       xfs_inode_t     *dp,            /* incore directory inode */
+       char            *name,          /* name of entry to remove */
+       xfs_fsblock_t   *first,         /* bmap's firstblock */
+       xfs_bmap_free_t *flist,         /* bmap's freeblock list */
+       xfs_extlen_t    total,          /* bmap's total block count */
+       xfs_dahash_t    hash,           /* name's real hash value */
+       int             namelen)        /* entry's name length */
+{
+       xfs_da_args_t   args;           /* operation arguments */
+       int             rval;           /* return value */
+       int             v;              /* type-checking value */
+
+       ASSERT((dp->i_d.di_mode & IFMT) == IFDIR);
+       if (namelen >= MAXNAMELEN)
+               return EINVAL;
+
+       /*
+        * Fill in the arg structure for this request.
+        */
+       args.name = name;
+       args.namelen = namelen;
+       args.hashval = hash;
+       args.inumber = 0;
+       args.dp = dp;
+       args.firstblock = first;
+       args.flist = flist;
+       args.total = total;
+       args.whichfork = XFS_DATA_FORK;
+       args.trans = tp;
+       args.justcheck = args.addname = 0;
+       args.oknoent = 1;
+
+       /*
+        * Decide on what work routines to call based on the inode size.
+        */
+       if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
+               rval = xfs_dir2_sf_removename(&args);
+       else if (rval = xfs_dir2_isblock(tp, dp, &v))
+               return rval;
+       else if (v)
+               rval = xfs_dir2_block_removename(&args);
+       else if (rval = xfs_dir2_isleaf(tp, dp, &v))
+               return rval;
+       else if (v)
+               rval = xfs_dir2_leaf_removename(&args);
+       else
+               rval = xfs_dir2_node_removename(&args);
+       return rval;
+}
+
+/*
+ * Utility routine common used to apply a delta to a field in the
+ * in-core superblock.
+ * Switch on the field indicated and apply the delta to that field.
+ * Fields are not allowed to dip below zero, so if the delta would
+ * do this do not apply it and return EINVAL.
+ *
+ * Originally derived from xfs_mod_incore_sb().
+ */
+int
+libxfs_mod_incore_sb(xfs_mount_t *mp, xfs_sb_field_t field, int delta, int rsvd)
+{
+       long long       lcounter;       /* long counter for 64 bit fields */
+
+       switch (field) {
+       case XFS_SBS_FDBLOCKS:
+               lcounter = (long long)mp->m_sb.sb_fdblocks;
+               lcounter += delta;
+               if (lcounter < 0)
+                       return (XFS_ERROR(ENOSPC));
+               mp->m_sb.sb_fdblocks = lcounter;
+               break;
+       default:
+               ASSERT(0);
+       }
+       return 0;
+}
+
+int
+libxfs_bmap_finish(
+       xfs_trans_t     **tp,
+       xfs_bmap_free_t *flist,
+       xfs_fsblock_t   firstblock,
+       int             *committed)
+{
+       xfs_bmap_free_item_t    *free;  /* free extent list item */
+       xfs_bmap_free_item_t    *next;  /* next item on free list */
+       int                     error;
+       xfs_trans_t             *ntp;
+
+       if (flist->xbf_count == 0) {
+               *committed = 0;
+               return 0;
+       }
+
+       for (free = flist->xbf_first; free != NULL; free = next) {
+               next = free->xbfi_next;
+               if (error = xfs_free_extent(*tp, free->xbfi_startblock,
+                               free->xbfi_blockcount))
+                       return error;
+               xfs_bmap_del_free(flist, NULL, free);
+       }
+       return 0;
+}
+
+/*
+ * This routine allocates disk space for the given file.
+ * Originally derived from xfs_alloc_file_space().
+ */
+int
+libxfs_alloc_file_space(
+       xfs_inode_t     *ip,
+       xfs_off_t       offset,
+       xfs_off_t       len,
+       int             alloc_type,
+       int             attr_flags)
+{
+       xfs_mount_t     *mp;
+       xfs_off_t       count;
+       xfs_filblks_t   datablocks;
+       xfs_filblks_t   allocated_fsb;
+       xfs_filblks_t   allocatesize_fsb;
+       xfs_fsblock_t   firstfsb;
+       xfs_bmap_free_t free_list;
+       xfs_bmbt_irec_t *imapp;
+       xfs_bmbt_irec_t imaps[1];
+       int             reccount;
+       uint            resblks;
+       xfs_fileoff_t   startoffset_fsb;
+       xfs_trans_t     *tp;
+       int             xfs_bmapi_flags;
+       int             committed;
+       int             error;
+
+       if (len <= 0)
+               return EINVAL;
+
+       count = len;
+       error = 0;
+       imapp = &imaps[0];
+       reccount = 1;
+       xfs_bmapi_flags = XFS_BMAPI_WRITE | (alloc_type ? XFS_BMAPI_PREALLOC : 0);
+       mp = ip->i_mount;
+       startoffset_fsb = XFS_B_TO_FSBT(mp, offset);
+       allocatesize_fsb = XFS_B_TO_FSB(mp, count);
+
+       /* allocate file space until done or until there is an error */
+       while (allocatesize_fsb && !error) {
+               datablocks = allocatesize_fsb;
+
+               tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
+               resblks = (uint)XFS_DIOSTRAT_SPACE_RES(mp, datablocks);
+               error = xfs_trans_reserve(tp, resblks, 0, 0, 0, 0);
+               if (error)
+                       break;
+               xfs_trans_ijoin(tp, ip, 0);
+               xfs_trans_ihold(tp, ip);
+
+               XFS_BMAP_INIT(&free_list, &firstfsb);
+               error = xfs_bmapi(tp, ip, startoffset_fsb, allocatesize_fsb,
+                               xfs_bmapi_flags, &firstfsb, 0, imapp,
+                               &reccount, &free_list);
+               if (error)
+                       break;
+
+               /* complete the transaction */
+               error = xfs_bmap_finish(&tp, &free_list, firstfsb, &committed);
+               if (error)
+                       break;
+
+               error = xfs_trans_commit(tp, 0, NULL);
+               if (error)
+                       break;
+
+               allocated_fsb = imapp->br_blockcount;
+               if (reccount == 0)
+                       return ENOSPC;
+
+               startoffset_fsb += allocated_fsb;
+               allocatesize_fsb -= allocated_fsb;
+       }
+       return error;
+}
+
+unsigned int
+libxfs_log2_roundup(unsigned int i)
+{
+       unsigned int    rval;
+
+       for (rval = 0; rval < NBBY * sizeof(i); rval++) {
+               if ((1 << rval) >= i)
+                       break;
+       }
+       return rval;
+}
+
+/*
+ * Get a buffer for the dir/attr block, fill in the contents.
+ * Don't check magic number, the caller will (it's xfs_repair).
+ * 
+ * Originally from xfs_da_btree.c in the kernel, but only used
+ * in userspace so it now resides here.
+ */
+int
+libxfs_da_read_bufr(
+       xfs_trans_t     *trans,
+       xfs_inode_t     *dp,
+       xfs_dablk_t     bno,
+       xfs_daddr_t             mappedbno,
+       xfs_dabuf_t     **bpp,
+       int             whichfork)
+{
+       return libxfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 2,
+               (inst_t *)__return_address);
+}
+
+/*
+ * Hold dabuf at transaction commit.
+ * 
+ * Originally from xfs_da_btree.c in the kernel, but only used
+ * in userspace so it now resides here.
+ */
+void
+libxfs_da_bhold(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
+{
+       int     i;
+
+       for (i = 0; i < dabuf->nbuf; i++)
+               xfs_trans_bhold(tp, dabuf->bps[i]);
+}
+
+/*
+ * Join dabuf to transaction.
+ * 
+ * Originally from xfs_da_btree.c in the kernel, but only used
+ * in userspace so it now resides here.
+ */
+void
+libxfs_da_bjoin(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
+{
+       int     i;
+
+       for (i = 0; i < dabuf->nbuf; i++)
+               xfs_trans_bjoin(tp, dabuf->bps[i]);
+}
diff --git a/libxfs/xfs.h b/libxfs/xfs.h
new file mode 100644 (file)
index 0000000..d702a38
--- /dev/null
@@ -0,0 +1,548 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+/*
+ * This header is effectively a "namespace multiplexor" for the
+ * user level XFS code.  It provides all of the necessary stuff
+ * such that we can build some parts of the XFS kernel code in
+ * user space in a controlled fashion, and translates the names
+ * used in the kernel into the names which libxfs is going to
+ * make available to user tools.
+ *
+ * It should only ever be #include'd by XFS "kernel" code being
+ * compiled in user space.
+ * 
+ * Our goals here are to...
+ *      o  "share" large amounts of complex code between user and
+ *         kernel space;
+ *      o  shield the user tools from changes in the bleeding
+ *         edge kernel code, merging source changes when
+ *         convenient and not immediately (no symlinks);
+ *      o  i.e. be able to merge changes to the kernel source back
+ *         into the affected user tools in a controlled fashion;
+ *      o  provide a _minimalist_ life-support system for kernel
+ *         code in user land, not the "everything + the kitchen
+ *         sink" model which libsim had mutated into;
+ *      o  allow the kernel code to be completely free of code
+ *         specifically there to support the user level build.
+ */
+
+#include <libxfs.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <attributes.h>
+
+/*
+ * Map XFS kernel routine names to libxfs.h names
+ */
+
+#define xfs_xlatesb                    libxfs_xlate_sb
+#define xfs_xlate_dinode_core          libxfs_xlate_dinode_core
+#define xfs_bmbt_get_all                libxfs_bmbt_get_all
+#define xfs_bmbt_get_blockcount         libxfs_bmbt_get_blockcount
+#define xfs_bmbt_get_startoff           libxfs_bmbt_get_startoff
+#define xfs_da_hashname                 libxfs_da_hashname
+#define xfs_da_log2_roundup             libxfs_da_log2_roundup
+#define xfs_highbit32                   libxfs_highbit32
+#define xfs_highbit64                   libxfs_highbit64
+#define xfs_attr_leaf_newentsize        libxfs_attr_leaf_newentsize
+#define xfs_alloc_compute_maxlevels     libxfs_alloc_compute_maxlevels
+#define xfs_bmap_compute_maxlevels      libxfs_bmap_compute_maxlevels
+#define xfs_ialloc_compute_maxlevels    libxfs_ialloc_compute_maxlevels
+
+#define xfs_dir_init                   libxfs_dir_init
+#define xfs_dir2_init                  libxfs_dir2_init
+#define xfs_dir_mount                   libxfs_dir_mount
+#define xfs_dir2_mount                  libxfs_dir2_mount
+#define xfs_dir_createname             libxfs_dir_createname
+#define xfs_dir2_createname            libxfs_dir2_createname
+#define xfs_dir_lookup                 libxfs_dir_lookup
+#define xfs_dir2_lookup                        libxfs_dir2_lookup
+#define xfs_dir_replace                        libxfs_dir_replace
+#define xfs_dir2_replace               libxfs_dir2_replace
+#define xfs_dir_removename             libxfs_dir_removename
+#define xfs_dir2_removename            libxfs_dir2_removename
+#define xfs_dir_bogus_removename       libxfs_dir_bogus_removename
+#define xfs_dir2_bogus_removename      libxfs_dir2_bogus_removename
+
+#define xfs_mount_common                libxfs_mount_common
+#define xfs_rtmount_init                libxfs_rtmount_init
+#define xfs_alloc_fix_freelist         libxfs_alloc_fix_freelist
+#define xfs_iread                      libxfs_iread
+#define xfs_ialloc                     libxfs_ialloc
+#define xfs_idata_realloc              libxfs_idata_realloc
+#define xfs_itobp                      libxfs_itobp
+#define xfs_ichgtime                   libxfs_ichgtime
+#define xfs_bmapi                      libxfs_bmapi
+#define xfs_bmap_finish                        libxfs_bmap_finish
+#define xfs_bmap_del_free              libxfs_bmap_del_free
+#define xfs_bunmapi                    libxfs_bunmapi
+#define xfs_free_extent                        libxfs_free_extent
+#define xfs_rtfree_extent              libxfs_rtfree_extent
+#define xfs_mod_sb                     libxfs_mod_sb
+#define xfs_mod_incore_sb              libxfs_mod_incore_sb
+
+#define xfs_trans_init                  libxfs_trans_init
+#define xfs_trans_dup                  libxfs_trans_dup
+#define xfs_trans_iget                 libxfs_trans_iget
+#define xfs_trans_ijoin                        libxfs_trans_ijoin
+#define xfs_trans_ihold                        libxfs_trans_ihold
+#define xfs_trans_bjoin                        libxfs_trans_bjoin
+#define xfs_trans_bhold                        libxfs_trans_bhold
+#define xfs_trans_alloc                        libxfs_trans_alloc
+#define xfs_trans_commit               libxfs_trans_commit
+#define xfs_trans_mod_sb               libxfs_trans_mod_sb
+#define xfs_trans_reserve              libxfs_trans_reserve
+#define xfs_trans_get_buf              libxfs_trans_get_buf
+#define xfs_trans_log_buf              libxfs_trans_log_buf
+#define xfs_trans_read_buf             libxfs_trans_read_buf
+#define xfs_trans_log_inode            libxfs_trans_log_inode
+#define xfs_trans_inode_alloc_buf      libxfs_trans_inode_alloc_buf
+#define xfs_trans_brelse               libxfs_trans_brelse
+#define xfs_trans_binval               libxfs_trans_binval
+
+#define xfs_da_shrink_inode            libxfs_da_shrink_inode
+#define xfs_da_grow_inode              libxfs_da_grow_inode
+#define xfs_da_brelse                  libxfs_da_brelse
+#define xfs_da_read_buf                        libxfs_da_read_buf              
+#define xfs_da_get_buf                 libxfs_da_get_buf
+#define xfs_da_log_buf                 libxfs_da_log_buf
+#define xfs_da_do_buf                  libxfs_da_do_buf
+#define xfs_dir2_shrink_inode          libxfs_dir2_shrink_inode
+#define xfs_dir2_grow_inode            libxfs_dir2_grow_inode
+#define xfs_dir2_isleaf                        libxfs_dir2_isleaf
+#define xfs_dir2_isblock               libxfs_dir2_isblock
+#define xfs_dir2_data_use_free         libxfs_dir2_data_use_free
+#define xfs_dir2_data_make_free                libxfs_dir2_data_make_free
+#define xfs_dir2_data_log_entry                libxfs_dir2_data_log_entry
+#define xfs_dir2_data_log_header       libxfs_dir2_data_log_header
+#define xfs_dir2_data_freescan         libxfs_dir2_data_freescan
+#define xfs_dir2_free_log_bests                libxfs_dir2_free_log_bests
+
+
+/*
+ * Infrastructure to support building kernel XFS code in user space
+ */
+
+/* buffer management */
+#define XFS_BUF_LOCK                   0
+#define XFS_BUF_MAPPED                 0
+#define XFS_BUF_TRYLOCK                        0
+#define XFS_BUF_ISDONE(bp)             0
+#define XFS_BUF_GETERROR(bp)           0
+#define XFS_BUF_DONE(bp)               ((void) 0)
+#define XFS_BUF_SET_REF(a,b)           ((void) 0)
+#define XFS_BUF_SET_VTYPE(a,b)         ((void) 0)
+#define XFS_BUF_SET_VTYPE_REF(a,b,c)   ((void) 0)
+#define XFS_BUF_SET_BDSTRAT_FUNC(a,b)  ((void) 0)
+#define xfs_baread(a,b,c)              ((void) 0)      /* no readahead */
+#define xfs_buftrace(x,y)              ((void) 0)      /* debug only */
+#define xfs_buf_item_log_debug(bip,a,b)        ((void) 0)      /* debug only */
+#define xfs_validate_extents(e,n,f)    ((void) 0)      /* debug only */
+#define xfs_buf_relse(bp)              libxfs_putbuf(bp)
+#define xfs_read_buf(mp,x,blkno,len,f,bpp)     \
+       ( *(bpp) = libxfs_readbuf( (mp)->m_dev, (blkno), (len), 1), 0 )
+
+
+/* transaction management */
+#define xfs_trans_set_sync(tp)                 ((void) 0)
+#define xfs_trans_agblocks_delta(tp, d)                ((void) 0)      /* debug only */
+#define xfs_trans_agflist_delta(tp, d)         ((void) 0)      /* debug only */
+#define xfs_trans_agbtree_delta(tp, d)         ((void) 0)      /* debug only */
+#define xfs_trans_mod_dquot_byino(tp,ip,f,d)   ((void) 0)
+#define xfs_trans_get_block_res(tp)            1
+#define xfs_trans_reserve_blkquota(tp,i,n)     0
+#define xfs_trans_unreserve_blkquota(tp,i,n)   ((void) 0)
+#define xfs_trans_unreserve_rtblkquota(tp,i,n) ((void) 0)
+
+
+/* memory management */
+#define kmem_zone_init(a, b)   libxfs_zone_init(a, b)
+#define kmem_zone_alloc(z, f)  libxfs_zone_zalloc(z)
+#define kmem_zone_zalloc(z, f) libxfs_zone_zalloc(z)
+#define kmem_zone_free(z, p)   libxfs_zone_free(z, p)
+#define kmem_realloc(p,sz,u,f) libxfs_realloc(p,sz)
+#define kmem_alloc(size, f)    libxfs_malloc(size)
+#define kmem_free(p, size)     libxfs_free(p)
+
+/* directory management */
+#define xfs_dir2_trace_args(where, args)               ((void) 0)
+#define xfs_dir2_trace_args_b(where, args, bp)         ((void) 0)
+#define xfs_dir2_trace_args_bb(where, args, lbp, dbp)  ((void) 0)
+#define xfs_dir2_trace_args_bibii(where, args, bs, ss, bd, sd, c) ((void) 0)
+#define xfs_dir2_trace_args_db(where, args, db, bp)    ((void) 0)
+#define xfs_dir2_trace_args_i(where, args, i)          ((void) 0)
+#define xfs_dir2_trace_args_s(where, args, s)          ((void) 0)
+#define xfs_dir2_trace_args_sb(where, args, s, bp)     ((void) 0)
+#define xfs_dir_shortform_validate_ondisk(a,b)         ((void) 0)
+
+
+/* block management */
+#define xfs_bmap_check_extents(ip,w)                   ((void) 0)
+#define xfs_bmap_trace_delete(f,d,ip,i,c,w)            ((void) 0)
+#define xfs_bmap_trace_exlist(f,ip,i,w)                        ((void) 0)
+#define xfs_bmap_trace_insert(f,d,ip,i,c,r1,r2,w)      ((void) 0)
+#define xfs_bmap_trace_post_update(f,d,ip,i,w)         ((void) 0)
+#define xfs_bmap_trace_pre_update(f,d,ip,i,w)          ((void) 0)
+#define xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap)   ((void) 0)
+#define xfs_bunmap_trace(ip, bno, len, flags, ra)      ((void) 0)
+#define XFS_BMBT_TRACE_ARGBI(c,b,i)                    ((void) 0)
+#define XFS_BMBT_TRACE_ARGBII(c,b,i,j)                 ((void) 0)
+#define XFS_BMBT_TRACE_ARGFFFI(c,o,b,i,j)              ((void) 0)
+#define XFS_BMBT_TRACE_ARGI(c,i)                       ((void) 0)
+#define XFS_BMBT_TRACE_ARGIFK(c,i,f,k)                 ((void) 0)
+#define XFS_BMBT_TRACE_ARGIFR(c,i,f,r)                 ((void) 0)
+#define XFS_BMBT_TRACE_ARGIK(c,i,k)                    ((void) 0)
+#define XFS_BMBT_TRACE_CURSOR(c,s)                     ((void) 0)
+
+
+/* anything else */
+typedef __uint32_t inst_t;     /* an instruction */
+typedef enum { B_FALSE, B_TRUE } boolean_t;
+typedef struct { dev_t dev; } buftarg_t;
+#define STATIC
+#define ENOATTR                1009    /* Attribute not found */
+#define EFSCORRUPTED   1010    /* Filesystem is corrupted */
+#define ktrace_t       void
+#define m_ddev_targp   m_dev
+#define KERN_WARNING
+#define XFS_ERROR(e)   (e)
+#define xfs_fs_cmn_err(a,b,msg,args...)        ( fprintf(stderr, msg, ## args) )
+#define printk(msg,args...)            ( fprintf(stderr, msg, ## args) )
+#define XFS_TEST_ERROR(expr,a,b,c)     ( expr )
+#define TRACE_FREE(s,a,b,x,f)          ((void) 0)
+#define TRACE_ALLOC(s,a)               ((void) 0)
+#define TRACE_MODAGF(a,b,c)            ((void) 0)
+#define XFS_FORCED_SHUTDOWN(mp)                0
+#define XFS_MOUNT_WSYNC                        0
+#define XFS_MOUNT_NOALIGN              0
+#define XFS_ILOCK_EXCL                 0
+#define mrlock(a,b,c)                  ((void) 0)
+#define mraccunlock(a)                 ((void) 0)
+#define mrunlock(a)                    ((void) 0)
+#define mraccess(a)                    ((void) 0)
+#define ismrlocked(a,b)                        1
+#define ovbcopy(from,to,count)         memmove(to,from,count)
+#define __return_address               __builtin_return_address(0)
+#define xfs_btree_reada_bufl(m,fsb,c)  ((void) 0)
+#define xfs_btree_reada_bufs(m,fsb,c,x)        ((void) 0)
+#undef  XFS_DIR_SHORTFORM_VALIDATE_ONDISK
+#define XFS_DIR_SHORTFORM_VALIDATE_ONDISK(mp,dip) 0
+
+#define do_mod(a, b)   ((a) % (b))
+#define do_div(n,base) ({ \
+       int __res; \
+       __res = ((unsigned long) n) % (unsigned) base; \
+       n = ((unsigned long) n) / (unsigned) base; \
+       __res; })
+
+#include <asm/page.h>
+#define NBPP   PAGE_SIZE
+
+static inline int atomicIncWithWrap(int *a, int b)
+{
+       int r = *a;
+       (*a)++;
+       if (*a == b)
+               *a = 0;
+       return r;
+}
+
+
+/*
+ * Prototypes needed for a clean build
+ */
+
+/* xfs_alloc.c */
+int  xfs_alloc_get_freelist (xfs_trans_t *, xfs_buf_t *, xfs_agblock_t *);
+void xfs_alloc_log_agf (xfs_trans_t *, xfs_buf_t *, int);
+int  xfs_alloc_put_freelist (xfs_trans_t *, xfs_buf_t *, xfs_buf_t *,
+                       xfs_agblock_t);
+int  xfs_alloc_read_agf (xfs_mount_t *, xfs_trans_t *, xfs_agnumber_t,
+                       int, xfs_buf_t **);
+int  xfs_alloc_vextent (xfs_alloc_arg_t *);
+int  xfs_alloc_pagf_init (xfs_mount_t *, xfs_trans_t *, xfs_agnumber_t, int);
+int  xfs_alloc_ag_vextent_size (xfs_alloc_arg_t *);
+int  xfs_alloc_ag_vextent_near (xfs_alloc_arg_t *);
+int  xfs_alloc_ag_vextent_exact (xfs_alloc_arg_t *);
+int  xfs_alloc_ag_vextent_small (xfs_alloc_arg_t *, xfs_btree_cur_t *,
+                       xfs_agblock_t *, xfs_extlen_t *, int *);
+
+/* xfs_ialloc.c */
+int  xfs_dialloc (xfs_trans_t *, xfs_ino_t, mode_t, int, xfs_buf_t **,
+                       boolean_t *, xfs_ino_t *);
+void xfs_ialloc_log_agi (xfs_trans_t *, xfs_buf_t *, int);
+int  xfs_ialloc_read_agi (xfs_mount_t *, xfs_trans_t *, xfs_agnumber_t,
+                       xfs_buf_t **);
+int  xfs_dilocate (xfs_mount_t *, xfs_trans_t *, xfs_ino_t, xfs_fsblock_t *,
+                       int *, int *, uint);
+
+/* xfs_rtalloc.c */
+int  xfs_rtfree_extent (xfs_trans_t *, xfs_rtblock_t, xfs_extlen_t);
+int  xfs_rtmodify_range (xfs_mount_t *, xfs_trans_t *, xfs_rtblock_t,
+                       xfs_extlen_t, int);
+int  xfs_rtmodify_summary (xfs_mount_t *, xfs_trans_t *, int,
+                       xfs_rtblock_t, int, xfs_buf_t **, xfs_fsblock_t *);
+
+/* xfs_btree.c */
+extern xfs_zone_t *xfs_btree_cur_zone;
+void xfs_btree_check_key (xfs_btnum_t, void *, void *);
+void xfs_btree_check_rec (xfs_btnum_t, void *, void *);
+int  xfs_btree_check_lblock (xfs_btree_cur_t *, xfs_btree_lblock_t *,
+                       int, xfs_buf_t *);
+int  xfs_btree_check_sblock (xfs_btree_cur_t *, xfs_btree_sblock_t *,
+                       int, xfs_buf_t *);
+int  xfs_btree_check_sptr (xfs_btree_cur_t *, xfs_agblock_t, int);
+int  xfs_btree_check_lptr (xfs_btree_cur_t *, xfs_dfsbno_t, int);
+void xfs_btree_del_cursor (xfs_btree_cur_t *, int);
+int  xfs_btree_dup_cursor (xfs_btree_cur_t *, xfs_btree_cur_t **);
+int  xfs_btree_firstrec (xfs_btree_cur_t *, int);
+xfs_btree_block_t *xfs_btree_get_block (xfs_btree_cur_t *, int, xfs_buf_t **);
+xfs_buf_t *xfs_btree_get_bufs (xfs_mount_t *, xfs_trans_t *, xfs_agnumber_t,
+                       xfs_agblock_t, uint);
+xfs_buf_t *xfs_btree_get_bufl (xfs_mount_t *, xfs_trans_t *tp,
+                       xfs_fsblock_t, uint);
+xfs_btree_cur_t *xfs_btree_init_cursor (xfs_mount_t *, xfs_trans_t *,
+                       xfs_buf_t *, xfs_agnumber_t, xfs_btnum_t,
+                       xfs_inode_t *, int);
+int  xfs_btree_islastblock (xfs_btree_cur_t *, int);
+int  xfs_btree_lastrec (xfs_btree_cur_t *, int);
+void xfs_btree_offsets (__int64_t, const short *, int, int *, int *);
+int  xfs_btree_readahead (xfs_btree_cur_t *, int, int);
+void xfs_btree_setbuf (xfs_btree_cur_t *, int, xfs_buf_t *);
+int  xfs_btree_read_bufs (xfs_mount_t *, xfs_trans_t *, xfs_agnumber_t,
+                       xfs_agblock_t, uint, xfs_buf_t **, int);
+int  xfs_btree_read_bufl (xfs_mount_t *, xfs_trans_t *, xfs_fsblock_t,
+                       uint, xfs_buf_t **, int);
+
+/* xfs_inode.c */
+int  xfs_ialloc (xfs_trans_t *, xfs_inode_t *, mode_t, nlink_t, dev_t, cred_t *,
+               xfs_prid_t, int, xfs_buf_t **, boolean_t *, xfs_inode_t **);
+int  xfs_iread_extents (xfs_trans_t *, xfs_inode_t *, int);
+int  xfs_imap (xfs_mount_t *, xfs_trans_t *, xfs_ino_t, xfs_imap_t *, uint);
+int  xfs_iextents_copy (xfs_inode_t *, xfs_bmbt_rec_32_t *, int);
+int  xfs_iflush_int (xfs_inode_t *, xfs_buf_t *);
+int  xfs_iflush_fork (xfs_inode_t *, xfs_dinode_t *, xfs_inode_log_item_t *,
+               int, xfs_buf_t *);
+int  xfs_iformat_local (xfs_inode_t *, xfs_dinode_t *, int, int);
+int  xfs_iformat_extents (xfs_inode_t *, xfs_dinode_t *, int);
+int  xfs_iformat_btree (xfs_inode_t *, xfs_dinode_t *, int);
+void xfs_iroot_realloc (xfs_inode_t *, int, int);
+void xfs_idata_realloc (xfs_inode_t *, int, int);
+void xfs_iext_realloc (xfs_inode_t *, int, int);
+void xfs_idestroy_fork (xfs_inode_t *, int);
+uint xfs_iroundup (uint);
+
+/* xfs_bmap.c */
+xfs_bmbt_rec_t *xfs_bmap_search_extents (xfs_inode_t *ip,
+                       xfs_fileoff_t, int, int *, xfs_extnum_t *,
+                       xfs_bmbt_irec_t *, xfs_bmbt_irec_t *);
+int  xfs_bmap_read_extents (xfs_trans_t *, xfs_inode_t *, int);
+void xfs_bmap_add_free (xfs_fsblock_t, xfs_filblks_t, xfs_bmap_free_t *,
+                       xfs_mount_t *);
+int  xfs_bmap_first_unused (xfs_trans_t *, xfs_inode_t *, xfs_extlen_t,
+                       xfs_fileoff_t *, int);
+int  xfs_bmap_last_offset (xfs_trans_t *, xfs_inode_t *, xfs_fileoff_t *, int);
+int  xfs_bmap_last_before (xfs_trans_t *, xfs_inode_t *, xfs_fileoff_t *, int);
+int  xfs_bmap_one_block (xfs_inode_t *, int);
+int  xfs_bmapi_single (xfs_trans_t *, xfs_inode_t *, int, xfs_fsblock_t *,
+                       xfs_fileoff_t);
+int  xfs_bmapi (xfs_trans_t *, xfs_inode_t *, xfs_fileoff_t,
+                       xfs_filblks_t, int, xfs_fsblock_t *, xfs_extlen_t,
+                       xfs_bmbt_irec_t *, int *, xfs_bmap_free_t *);
+int  xfs_bunmapi (xfs_trans_t *, xfs_inode_t *, xfs_fileoff_t,
+                       xfs_filblks_t, int, xfs_extnum_t, xfs_fsblock_t *,
+                       xfs_bmap_free_t *, int *);
+int  xfs_bmap_add_extent_hole_delay (xfs_inode_t *ip, xfs_extnum_t,
+                       xfs_btree_cur_t *, xfs_bmbt_irec_t *, int *, int);
+int  xfs_bmap_add_extent_hole_real (xfs_inode_t *, xfs_extnum_t,
+                       xfs_btree_cur_t *, xfs_bmbt_irec_t *, int *, int);
+int  xfs_bmap_add_extent_unwritten_real (xfs_inode_t *, xfs_extnum_t,
+                       xfs_btree_cur_t **, xfs_bmbt_irec_t *, int *);
+int  xfs_bmap_add_extent_delay_real (xfs_inode_t *, xfs_extnum_t,
+                       xfs_btree_cur_t **, xfs_bmbt_irec_t *, xfs_filblks_t *,
+                       xfs_fsblock_t *, xfs_bmap_free_t *, int *, int);
+int  xfs_bmap_extents_to_btree (xfs_trans_t *, xfs_inode_t *, xfs_fsblock_t *,
+                       xfs_bmap_free_t *, xfs_btree_cur_t **, int, int *, int);
+void xfs_bmap_delete_exlist (xfs_inode_t *, xfs_extnum_t, xfs_extnum_t, int);
+xfs_filblks_t xfs_bmap_worst_indlen (xfs_inode_t *, xfs_filblks_t);
+int  xfs_bmap_isaeof (xfs_inode_t *, xfs_fileoff_t, int, int *);
+void xfs_bmap_insert_exlist (xfs_inode_t *, xfs_extnum_t, xfs_extnum_t,
+                       xfs_bmbt_irec_t *, int);
+
+/* xfs_bmap_btree.c */
+int  xfs_check_nostate_extents (xfs_bmbt_rec_t *, xfs_extnum_t);
+void xfs_bmbt_log_ptrs (xfs_btree_cur_t *, xfs_buf_t *, int, int);
+void xfs_bmbt_log_keys (xfs_btree_cur_t *, xfs_buf_t *, int, int);
+int  xfs_bmbt_killroot (xfs_btree_cur_t *, int);
+int  xfs_bmbt_updkey (xfs_btree_cur_t *, xfs_bmbt_key_t *, int);
+int  xfs_bmbt_lshift (xfs_btree_cur_t *, int, int *);
+int  xfs_bmbt_rshift (xfs_btree_cur_t *, int, int *);
+int  xfs_bmbt_split (xfs_btree_cur_t *, int, xfs_fsblock_t *,
+                       xfs_bmbt_key_t *, xfs_btree_cur_t **, int *);
+
+/* xfs_ialloc_btree.c */
+int  xfs_inobt_newroot (xfs_btree_cur_t *, int *);
+int  xfs_inobt_rshift (xfs_btree_cur_t *, int, int *);
+int  xfs_inobt_lshift (xfs_btree_cur_t *, int, int *);
+int  xfs_inobt_split (xfs_btree_cur_t *, int, xfs_agblock_t *,
+                       xfs_inobt_key_t *, xfs_btree_cur_t **, int *);
+void xfs_inobt_log_keys (xfs_btree_cur_t *, xfs_buf_t *, int, int);
+void xfs_inobt_log_ptrs (xfs_btree_cur_t *, xfs_buf_t *, int, int);
+void xfs_inobt_log_recs (xfs_btree_cur_t *, xfs_buf_t *, int, int);
+void xfs_inobt_log_block (xfs_trans_t *, xfs_buf_t *, int);
+int  xfs_inobt_updkey (xfs_btree_cur_t *, xfs_inobt_key_t *, int);
+
+/* xfs_alloc_btree.c */
+void xfs_alloc_log_ptrs (xfs_btree_cur_t *, xfs_buf_t *, int, int);
+void xfs_alloc_log_keys (xfs_btree_cur_t *, xfs_buf_t *, int, int);
+void xfs_alloc_log_recs (xfs_btree_cur_t *, xfs_buf_t *, int, int);
+void xfs_alloc_log_block (xfs_trans_t *, xfs_buf_t *, int);
+int  xfs_alloc_updkey (xfs_btree_cur_t *, xfs_alloc_key_t *, int);
+int  xfs_alloc_lshift (xfs_btree_cur_t *, int, int *);
+int  xfs_alloc_rshift (xfs_btree_cur_t *, int, int *);
+int  xfs_alloc_newroot (xfs_btree_cur_t *, int *);
+int  xfs_alloc_split (xfs_btree_cur_t *, int, xfs_agblock_t *,
+                       xfs_alloc_key_t *, xfs_btree_cur_t **, int *);
+
+/* xfs_da_btree.c */
+xfs_dabuf_t *xfs_da_buf_make (int, xfs_buf_t **, inst_t *);
+int  xfs_da_root_join (xfs_da_state_t *, xfs_da_state_blk_t *);
+int  xfs_da_root_split (xfs_da_state_t *, xfs_da_state_blk_t *,
+                       xfs_da_state_blk_t *);
+void xfs_da_node_add (xfs_da_state_t *, xfs_da_state_blk_t *,
+                       xfs_da_state_blk_t *);
+int  xfs_da_node_split (xfs_da_state_t *, xfs_da_state_blk_t *,
+                       xfs_da_state_blk_t *, xfs_da_state_blk_t *, int, int *);
+void xfs_da_node_rebalance (xfs_da_state_t *, xfs_da_state_blk_t *,
+                       xfs_da_state_blk_t *);
+void xfs_da_node_remove (xfs_da_state_t *, xfs_da_state_blk_t *);
+void xfs_da_node_unbalance (xfs_da_state_t *, xfs_da_state_blk_t *,
+                       xfs_da_state_blk_t *);
+int  xfs_da_node_order (xfs_dabuf_t *, xfs_dabuf_t *);
+int  xfs_da_node_toosmall (xfs_da_state_t *, int *);
+uint xfs_da_node_lasthash (xfs_dabuf_t *, int *);
+int  xfs_da_do_buf (xfs_trans_t *, xfs_inode_t *, xfs_dablk_t, xfs_daddr_t *,
+                       xfs_dabuf_t **, int, int, inst_t *);
+
+/* xfs_dir.c */
+int  xfs_dir_node_addname (xfs_da_args_t *);
+int  xfs_dir_leaf_lookup (xfs_da_args_t *);
+int  xfs_dir_node_lookup (xfs_da_args_t *);
+int  xfs_dir_leaf_replace (xfs_da_args_t *);
+int  xfs_dir_node_replace (xfs_da_args_t *);
+int  xfs_dir_node_removename (xfs_da_args_t *);
+int  xfs_dir_leaf_removename (xfs_da_args_t *, int *, int *);
+
+/* xfs_dir_leaf.c */
+void xfs_dir_leaf_rebalance (xfs_da_state_t *, xfs_da_state_blk_t *,
+                       xfs_da_state_blk_t *);
+void xfs_dir_leaf_add_work (xfs_dabuf_t *, xfs_da_args_t *, int, int);
+int  xfs_dir_leaf_compact (xfs_trans_t *, xfs_dabuf_t *, int, int);
+int  xfs_dir_leaf_figure_balance (xfs_da_state_t *, xfs_da_state_blk_t *,
+                       xfs_da_state_blk_t *, int *, int *);
+void xfs_dir_leaf_moveents (xfs_dir_leafblock_t *, int,
+                       xfs_dir_leafblock_t *, int, int, xfs_mount_t *);
+
+/* xfs_dir2_leaf.c */
+void xfs_dir2_leaf_check (xfs_inode_t *, xfs_dabuf_t *);
+int  xfs_dir2_leaf_lookup_int (xfs_da_args_t *, xfs_dabuf_t **,
+                       int *, xfs_dabuf_t **);
+
+/* xfs_dir2_block.c */
+void xfs_dir2_block_log_tail (xfs_trans_t *, xfs_dabuf_t *);
+void xfs_dir2_block_log_leaf (xfs_trans_t *, xfs_dabuf_t *, int, int);
+int  xfs_dir2_block_lookup_int (xfs_da_args_t *, xfs_dabuf_t **, int *);
+
+/* xfs_dir2_node.c */
+void xfs_dir2_leafn_check (xfs_inode_t *, xfs_dabuf_t *);
+int  xfs_dir2_leafn_remove (xfs_da_args_t *, xfs_dabuf_t *, int,
+                       xfs_da_state_blk_t *, int *);
+int  xfs_dir2_node_addname_int (xfs_da_args_t *, xfs_da_state_blk_t *);
+
+/* xfs_dir2_sf.c */
+void xfs_dir2_sf_check (xfs_da_args_t *);
+int  xfs_dir2_sf_addname_pick (xfs_da_args_t *, int,
+                       xfs_dir2_sf_entry_t **, xfs_dir2_data_aoff_t *);
+void xfs_dir2_sf_addname_easy (xfs_da_args_t *, xfs_dir2_sf_entry_t *,
+                       xfs_dir2_data_aoff_t, int);
+void xfs_dir2_sf_addname_hard (xfs_da_args_t *, int, int);
+void xfs_dir2_sf_toino8 (xfs_da_args_t *);
+void xfs_dir2_sf_toino4 (xfs_da_args_t *);
+
+/* xfs_attr_leaf.c */
+void xfs_attr_leaf_rebalance (xfs_da_state_t *, xfs_da_state_blk_t *,
+                       xfs_da_state_blk_t *);
+int  xfs_attr_leaf_add_work (xfs_dabuf_t *, xfs_da_args_t *, int);
+void xfs_attr_leaf_compact (xfs_trans_t *, xfs_dabuf_t *);
+void xfs_attr_leaf_moveents (xfs_attr_leafblock_t *, int,
+                       xfs_attr_leafblock_t *, int, int, xfs_mount_t *);
+int  xfs_attr_leaf_figure_balance (xfs_da_state_t *, xfs_da_state_blk_t *,
+                       xfs_da_state_blk_t *, int *, int *);
+
+/* xfs_trans_item.c */
+xfs_log_item_desc_t *xfs_trans_add_item (xfs_trans_t *, xfs_log_item_t *);
+xfs_log_item_desc_t *xfs_trans_find_item (xfs_trans_t *, xfs_log_item_t *);
+void xfs_trans_free_item (xfs_trans_t *, xfs_log_item_desc_t *);
+void xfs_trans_free_items (xfs_trans_t *, int);
+
+/* xfs_trans_buf.c */
+xfs_buf_t *xfs_trans_buf_item_match (xfs_trans_t *, buftarg_t *,
+                       xfs_daddr_t, int);
+xfs_buf_t *xfs_trans_buf_item_match_all (xfs_trans_t *, buftarg_t *,
+                       xfs_daddr_t, int);
+
+/* xfs_inode_item.c */
+void xfs_inode_item_init (xfs_inode_t *, xfs_mount_t *);
+
+/* xfs_buf_item.c */
+void xfs_buf_item_init (xfs_buf_t *, xfs_mount_t *);
+void xfs_buf_item_log (xfs_buf_log_item_t *, uint, uint);
+
+/* local source files */
+int  xfs_mod_incore_sb (xfs_mount_t *, xfs_sb_field_t, int, int);
+void xfs_trans_mod_sb (xfs_trans_t *, uint, long);
+int  xfs_trans_unlock_chunk (xfs_log_item_chunk_t *, int, int, xfs_lsn_t);
+
+
+#ifndef DEBUG
+#define xfs_inobp_check(mp,bp)                         ((void) 0)
+#define xfs_btree_check_key(a,b,c)                     ((void) 0)
+#define xfs_btree_check_rec(a,b,c)                     ((void) 0)
+#define xfs_btree_check_block(a,b,c,d)                 ((void) 0)
+#define xfs_dir2_sf_check(args)                                ((void) 0)
+#define xfs_dir2_leaf_check(dp,bp)                     ((void) 0)
+#define xfs_dir2_leafn_check(dp,bp)                    ((void) 0)
+#undef xfs_dir2_data_check
+#define xfs_dir2_data_check(dp,bp)                     ((void) 0)
+#endif
diff --git a/libxfs/xfs_alloc.c b/libxfs/xfs_alloc.c
new file mode 100644 (file)
index 0000000..9792416
--- /dev/null
@@ -0,0 +1,2355 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#include <xfs.h>
+
+#define XFS_ABSDIFF(a,b)       (((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
+#define        XFSA_FIXUP_BNO_OK       1
+#define        XFSA_FIXUP_CNT_OK       2
+
+/*
+ * Compute aligned version of the found extent.
+ * Takes alignment and min length into account.
+ */
+STATIC int                             /* success (>= minlen) */
+xfs_alloc_compute_aligned(
+       xfs_agblock_t   foundbno,       /* starting block in found extent */
+       xfs_extlen_t    foundlen,       /* length in found extent */
+       xfs_extlen_t    alignment,      /* alignment for allocation */
+       xfs_extlen_t    minlen,         /* minimum length for allocation */
+       xfs_agblock_t   *resbno,        /* result block number */
+       xfs_extlen_t    *reslen)        /* result length */
+{
+       xfs_agblock_t   bno;
+       xfs_extlen_t    diff;
+       xfs_extlen_t    len;
+
+       if (alignment > 1 && foundlen >= minlen) {
+               bno = roundup(foundbno, alignment);
+               diff = bno - foundbno;
+               len = diff >= foundlen ? 0 : foundlen - diff;
+       } else {
+               bno = foundbno;
+               len = foundlen;
+       }
+       *resbno = bno;
+       *reslen = len;
+       return len >= minlen;
+}
+
+/*
+ * Compute best start block and diff for "near" allocations.
+ * freelen >= wantlen already checked by caller.
+ */
+STATIC xfs_extlen_t                    /* difference value (absolute) */
+xfs_alloc_compute_diff(
+       xfs_agblock_t   wantbno,        /* target starting block */
+       xfs_extlen_t    wantlen,        /* target length */
+       xfs_extlen_t    alignment,      /* target alignment */
+       xfs_agblock_t   freebno,        /* freespace's starting block */
+       xfs_extlen_t    freelen,        /* freespace's length */
+       xfs_agblock_t   *newbnop)       /* result: best start block from free */
+{
+       xfs_agblock_t   freeend;        /* end of freespace extent */
+       xfs_agblock_t   newbno1;        /* return block number */
+       xfs_agblock_t   newbno2;        /* other new block number */
+       xfs_extlen_t    newlen1;        /* length with newbno1 */
+       xfs_extlen_t    newlen2;        /* length with newbno2 */
+       xfs_agblock_t   wantend;        /* end of target extent */
+
+       ASSERT(freelen >= wantlen);
+       freeend = freebno + freelen;
+       wantend = wantbno + wantlen;
+       if (freebno >= wantbno) {
+               if ((newbno1 = roundup(freebno, alignment)) >= freeend)
+                       newbno1 = NULLAGBLOCK;
+       } else if (freeend >= wantend && alignment > 1) {
+               newbno1 = roundup(wantbno, alignment);
+               newbno2 = newbno1 - alignment;
+               if (newbno1 >= freeend)
+                       newbno1 = NULLAGBLOCK;
+               else
+                       newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1);
+               if (newbno2 < freebno)
+                       newbno2 = NULLAGBLOCK;
+               else
+                       newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2);
+               if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
+                       if (newlen1 < newlen2 ||
+                           (newlen1 == newlen2 &&
+                            XFS_ABSDIFF(newbno1, wantbno) >
+                            XFS_ABSDIFF(newbno2, wantbno)))
+                               newbno1 = newbno2;
+               } else if (newbno2 != NULLAGBLOCK)
+                       newbno1 = newbno2;
+       } else if (freeend >= wantend) {
+               newbno1 = wantbno;
+       } else if (alignment > 1) {
+               newbno1 = roundup(freeend - wantlen, alignment);
+               if (newbno1 > freeend - wantlen &&
+                   newbno1 - alignment >= freebno)
+                       newbno1 -= alignment;
+               else if (newbno1 >= freeend)
+                       newbno1 = NULLAGBLOCK;
+       } else
+               newbno1 = freeend - wantlen;
+       *newbnop = newbno1;
+       return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
+}
+
+/*
+ * Fix up the length, based on mod and prod.
+ * len should be k * prod + mod for some k.
+ * If len is too small it is returned unchanged.
+ * If len hits maxlen it is left alone.
+ */
+STATIC void
+xfs_alloc_fix_len(
+       xfs_alloc_arg_t *args)          /* allocation argument structure */
+{
+       xfs_extlen_t    k;
+       xfs_extlen_t    rlen;
+
+       ASSERT(args->mod < args->prod);
+       rlen = args->len;
+       ASSERT(rlen >= args->minlen);
+       ASSERT(rlen <= args->maxlen);
+       if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen ||
+           (args->mod == 0 && rlen < args->prod))
+               return;
+       k = rlen % args->prod;
+       if (k == args->mod)
+               return;
+       if (k > args->mod) {
+               if ((int)(rlen = rlen - k - args->mod) < (int)args->minlen)
+                       return;
+       } else {
+               if ((int)(rlen = rlen - args->prod - (args->mod - k)) <
+                   (int)args->minlen)
+                       return;
+       }
+       ASSERT(rlen >= args->minlen);
+       ASSERT(rlen <= args->maxlen);
+       args->len = rlen;
+}
+
+/*
+ * Fix up length if there is too little space left in the a.g.
+ * Return 1 if ok, 0 if too little, should give up.
+ */
+STATIC int
+xfs_alloc_fix_minleft(
+       xfs_alloc_arg_t *args)          /* allocation argument structure */
+{
+       xfs_agf_t       *agf;           /* a.g. freelist header */
+       int             diff;           /* free space difference */
+
+       if (args->minleft == 0)
+               return 1;
+       agf = XFS_BUF_TO_AGF(args->agbp);
+       diff = INT_GET(agf->agf_freeblks, ARCH_CONVERT)
+               + INT_GET(agf->agf_flcount, ARCH_CONVERT)
+               - args->len - args->minleft;
+       if (diff >= 0)
+               return 1;
+       args->len += diff;              /* shrink the allocated space */
+       if (args->len >= args->minlen)
+               return 1;
+       args->agbno = NULLAGBLOCK;
+       return 0;
+}
+
+/*
+ * Update the two btrees, logically removing from freespace the extent
+ * starting at rbno, rlen blocks.  The extent is contained within the
+ * actual (current) free extent fbno for flen blocks.
+ * Flags are passed in indicating whether the cursors are set to the
+ * relevant records.
+ */
+STATIC int                             /* error code */
+xfs_alloc_fixup_trees(
+       xfs_btree_cur_t *cnt_cur,       /* cursor for by-size btree */
+       xfs_btree_cur_t *bno_cur,       /* cursor for by-block btree */
+       xfs_agblock_t   fbno,           /* starting block of free extent */
+       xfs_extlen_t    flen,           /* length of free extent */
+       xfs_agblock_t   rbno,           /* starting block of returned extent */
+       xfs_extlen_t    rlen,           /* length of returned extent */
+       int             flags)          /* flags, XFSA_FIXUP_... */
+{
+       int             error;          /* error code */
+       int             i;              /* operation results */
+       xfs_agblock_t   nfbno1;         /* first new free startblock */
+       xfs_agblock_t   nfbno2;         /* second new free startblock */
+       xfs_extlen_t    nflen1;         /* first new free length */
+       xfs_extlen_t    nflen2;         /* second new free length */
+
+       /*
+        * Look up the record in the by-size tree if necessary.
+        */
+       if (flags & XFSA_FIXUP_CNT_OK) {
+#ifdef DEBUG
+               if (error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i))
+                       return error;
+               XFS_WANT_CORRUPTED_RETURN(
+                       i == 1 && nfbno1 == fbno && nflen1 == flen);
+#endif
+       } else {
+               if (error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i))
+                       return error;
+               XFS_WANT_CORRUPTED_RETURN(i == 1);
+       }
+       /*
+        * Look up the record in the by-block tree if necessary.
+        */
+       if (flags & XFSA_FIXUP_BNO_OK) {
+#ifdef DEBUG
+               if (error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i))
+                       return error;
+               XFS_WANT_CORRUPTED_RETURN(
+                       i == 1 && nfbno1 == fbno && nflen1 == flen);
+#endif
+       } else {
+               if (error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i))
+                       return error;
+               XFS_WANT_CORRUPTED_RETURN(i == 1);
+       }
+#ifdef DEBUG
+       {
+               xfs_alloc_block_t       *bnoblock;
+               xfs_alloc_block_t       *cntblock;
+
+               if (bno_cur->bc_nlevels == 1 &&
+                   cnt_cur->bc_nlevels == 1) {
+                       bnoblock = XFS_BUF_TO_ALLOC_BLOCK(bno_cur->bc_bufs[0]);
+                       cntblock = XFS_BUF_TO_ALLOC_BLOCK(cnt_cur->bc_bufs[0]);
+                       XFS_WANT_CORRUPTED_RETURN(
+                               INT_GET(bnoblock->bb_numrecs, ARCH_CONVERT) == INT_GET(cntblock->bb_numrecs, ARCH_CONVERT));
+               }
+       }
+#endif
+       /*
+        * Deal with all four cases: the allocated record is contained
+        * within the freespace record, so we can have new freespace
+        * at either (or both) end, or no freespace remaining.
+        */
+       if (rbno == fbno && rlen == flen)
+               nfbno1 = nfbno2 = NULLAGBLOCK;
+       else if (rbno == fbno) {
+               nfbno1 = rbno + rlen;
+               nflen1 = flen - rlen;
+               nfbno2 = NULLAGBLOCK;
+       } else if (rbno + rlen == fbno + flen) {
+               nfbno1 = fbno;
+               nflen1 = flen - rlen;
+               nfbno2 = NULLAGBLOCK;
+       } else {
+               nfbno1 = fbno;
+               nflen1 = rbno - fbno;
+               nfbno2 = rbno + rlen;
+               nflen2 = (fbno + flen) - nfbno2;
+       }
+       /*
+        * Delete the entry from the by-size btree.
+        */
+       if (error = xfs_alloc_delete(cnt_cur, &i))
+               return error;
+       XFS_WANT_CORRUPTED_RETURN(i == 1);
+       /*
+        * Add new by-size btree entry(s).
+        */
+       if (nfbno1 != NULLAGBLOCK) {
+               if (error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i))
+                       return error;
+               XFS_WANT_CORRUPTED_RETURN(i == 0);
+               if (error = xfs_alloc_insert(cnt_cur, &i))
+                       return error;
+               XFS_WANT_CORRUPTED_RETURN(i == 1);
+       }
+       if (nfbno2 != NULLAGBLOCK) {
+               if (error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i))
+                       return error;
+               XFS_WANT_CORRUPTED_RETURN(i == 0);
+               if (error = xfs_alloc_insert(cnt_cur, &i))
+                       return error;
+               XFS_WANT_CORRUPTED_RETURN(i == 1);
+       }
+       /*
+        * Fix up the by-block btree entry(s).
+        */
+       if (nfbno1 == NULLAGBLOCK) {
+               /*
+                * No remaining freespace, just delete the by-block tree entry.
+                */
+               if (error = xfs_alloc_delete(bno_cur, &i))
+                       return error;
+               XFS_WANT_CORRUPTED_RETURN(i == 1);
+       } else {
+               /*
+                * Update the by-block entry to start later|be shorter.
+                */
+               if (error = xfs_alloc_update(bno_cur, nfbno1, nflen1))
+                       return error;
+       }
+       if (nfbno2 != NULLAGBLOCK) {
+               /*
+                * 2 resulting free entries, need to add one.
+                */
+               if (error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i))
+                       return error;
+               XFS_WANT_CORRUPTED_RETURN(i == 0);
+               if (error = xfs_alloc_insert(bno_cur, &i))
+                       return error;
+               XFS_WANT_CORRUPTED_RETURN(i == 1);
+       }
+       return 0;
+}
+
+/*
+ * Read in the allocation group free block array.
+ */
+STATIC int                             /* error */
+xfs_alloc_read_agfl(
+       xfs_mount_t     *mp,            /* mount point structure */
+       xfs_trans_t     *tp,            /* transaction pointer */
+       xfs_agnumber_t  agno,           /* allocation group number */
+       xfs_buf_t       **bpp)          /* buffer for the ag free block array */
+{
+       xfs_buf_t       *bp;            /* return value */
+       xfs_daddr_t     d;              /* disk block address */
+       int             error;
+
+       ASSERT(agno != NULLAGNUMBER);
+       d = XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR);
+       if (error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, 1, 0, &bp))
+               return error;
+       ASSERT(bp);
+       ASSERT(!XFS_BUF_GETERROR(bp));
+       XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGFL, XFS_AGFL_REF);
+       *bpp = bp;
+       return 0;
+}
+
+#if defined(XFS_ALLOC_TRACE)
+/*
+ * Add an allocation trace entry for an alloc call.
+ */
+STATIC void
+xfs_alloc_trace_alloc(
+       char            *name,          /* function tag string */
+       char            *str,           /* additional string */
+       xfs_alloc_arg_t *args,          /* allocation argument structure */
+       int             line)           /* source line number */
+{
+       ktrace_enter(xfs_alloc_trace_buf,
+               (void *)(__psint_t)(XFS_ALLOC_KTRACE_ALLOC | (line << 16)),
+               (void *)name,
+               (void *)str,
+               (void *)args->mp,
+               (void *)(__psunsigned_t)args->agno,
+               (void *)(__psunsigned_t)args->agbno,
+               (void *)(__psunsigned_t)args->minlen,
+               (void *)(__psunsigned_t)args->maxlen,
+               (void *)(__psunsigned_t)args->mod,
+               (void *)(__psunsigned_t)args->prod,
+               (void *)(__psunsigned_t)args->minleft,
+               (void *)(__psunsigned_t)args->total,
+               (void *)(__psunsigned_t)args->alignment,
+               (void *)(__psunsigned_t)args->len,
+               (void *)((((__psint_t)args->type) << 16) |
+                        (__psint_t)args->otype),
+               (void *)(__psint_t)((args->wasdel << 3) |
+                                   (args->wasfromfl << 2) |
+                                   (args->isfl << 1) |
+                                   (args->userdata << 0)));
+}
+
+/*
+ * Add an allocation trace entry for a free call.
+ */
+STATIC void
+xfs_alloc_trace_free(
+       char            *name,          /* function tag string */
+       char            *str,           /* additional string */
+       xfs_mount_t     *mp,            /* file system mount point */
+       xfs_agnumber_t  agno,           /* allocation group number */
+       xfs_agblock_t   agbno,          /* a.g. relative block number */
+       xfs_extlen_t    len,            /* length of extent */
+       int             isfl,           /* set if is freelist allocation/free */
+       int             line)           /* source line number */
+{
+       ktrace_enter(xfs_alloc_trace_buf,
+               (void *)(__psint_t)(XFS_ALLOC_KTRACE_FREE | (line << 16)),
+               (void *)name,
+               (void *)str,
+               (void *)mp,
+               (void *)(__psunsigned_t)agno,
+               (void *)(__psunsigned_t)agbno,
+               (void *)(__psunsigned_t)len,
+               (void *)(__psint_t)isfl,
+               NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
+}
+
+/*
+ * Add an allocation trace entry for modifying an agf.
+ */
+STATIC void
+xfs_alloc_trace_modagf(
+       char            *name,          /* function tag string */
+       char            *str,           /* additional string */
+       xfs_mount_t     *mp,            /* file system mount point */
+       xfs_agf_t       *agf,           /* new agf value */
+       int             flags,          /* logging flags for agf */
+       int             line)           /* source line number */
+{
+       ktrace_enter(xfs_alloc_trace_buf,
+               (void *)(__psint_t)(XFS_ALLOC_KTRACE_MODAGF | (line << 16)),
+               (void *)name,
+               (void *)str,
+               (void *)mp,
+               (void *)(__psint_t)flags,
+               (void *)(__psunsigned_t)INT_GET(agf->agf_seqno, ARCH_CONVERT),
+               (void *)(__psunsigned_t)INT_GET(agf->agf_length, ARCH_CONVERT),
+               (void *)(__psunsigned_t)INT_GET(agf->agf_roots[XFS_BTNUM_BNO],
+                                               ARCH_CONVERT);
+               (void *)(__psunsigned_t)INT_GET(agf->agf_roots[XFS_BTNUM_CNT],
+                                               ARCH_CONVERT);
+               (void *)(__psunsigned_t)INT_GET(agf->agf_levels[XFS_BTNUM_BNO],
+                                               ARCH_CONVERT);
+               (void *)(__psunsigned_t)INT_GET(agf->agf_levels[XFS_BTNUM_CNT],
+                                               ARCH_CONVERT);
+               (void *)(__psunsigned_t)INT_GET(agf->agf_flfirst, ARCH_CONVERT),
+               (void *)(__psunsigned_t)INT_GET(agf->agf_fllast, ARCH_CONVERT),
+               (void *)(__psunsigned_t)INT_GET(agf->agf_flcount, ARCH_CONVERT),
+               (void *)(__psunsigned_t)INT_GET(agf->agf_freeblks, ARCH_CONVERT),
+               (void *)(__psunsigned_t)INT_GET(agf->agf_longest, ARCH_CONVERT));
+}
+#endif /* XFS_ALLOC_TRACE */
+
+/*
+ * Allocation group level functions.
+ */
+
+/*
+ * Allocate a variable extent in the allocation group agno.
+ * Type and bno are used to determine where in the allocation group the
+ * extent will start.
+ * Extent's length (returned in *len) will be between minlen and maxlen,
+ * and of the form k * prod + mod unless there's nothing that large.
+ * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
+ */
+STATIC int                     /* error */
+xfs_alloc_ag_vextent(
+       xfs_alloc_arg_t *args)  /* argument structure for allocation */
+{
+       int             error;
+#ifdef XFS_ALLOC_TRACE
+       static char     fname[] = "xfs_alloc_ag_vextent";
+#endif
+
+       ASSERT(args->minlen > 0);
+       ASSERT(args->maxlen > 0);
+       ASSERT(args->minlen <= args->maxlen);
+       ASSERT(args->mod < args->prod);
+       ASSERT(args->alignment > 0);
+       /*
+        * Branch to correct routine based on the type.
+        */
+       args->wasfromfl = 0;
+       switch (args->type) {
+       case XFS_ALLOCTYPE_THIS_AG:
+               error = xfs_alloc_ag_vextent_size(args);
+               break;
+       case XFS_ALLOCTYPE_NEAR_BNO:
+               error = xfs_alloc_ag_vextent_near(args);
+               break;
+       case XFS_ALLOCTYPE_THIS_BNO:
+               error = xfs_alloc_ag_vextent_exact(args);
+               break;
+       default:
+               ASSERT(0);
+               /* NOTREACHED */
+       }
+       if (error)
+               return error;
+       /*
+        * If the allocation worked, need to change the agf structure
+        * (and log it), and the superblock.
+        */
+       if (args->agbno != NULLAGBLOCK) {
+               xfs_agf_t       *agf;   /* allocation group freelist header */
+#ifdef XFS_ALLOC_TRACE
+               xfs_mount_t     *mp = args->mp;
+#endif
+               long            slen = (long)args->len;
+
+               ASSERT(args->len >= args->minlen && args->len <= args->maxlen);
+               ASSERT(!(args->wasfromfl) || !args->isfl);
+               ASSERT(args->agbno % args->alignment == 0);
+               if (!(args->wasfromfl)) {
+
+                       agf = XFS_BUF_TO_AGF(args->agbp);
+                       INT_MOD(agf->agf_freeblks, ARCH_CONVERT, -(args->len));
+                       xfs_trans_agblocks_delta(args->tp,
+                                                -((long)(args->len)));
+                       args->pag->pagf_freeblks -= args->len;
+                       ASSERT(INT_GET(agf->agf_freeblks, ARCH_CONVERT)
+                               <= INT_GET(agf->agf_length, ARCH_CONVERT));
+                       TRACE_MODAGF(NULL, agf, XFS_AGF_FREEBLKS);
+                       xfs_alloc_log_agf(args->tp, args->agbp,
+                                               XFS_AGF_FREEBLKS);
+               }
+               if (!args->isfl)
+                       xfs_trans_mod_sb(args->tp,
+                               args->wasdel ? XFS_TRANS_SB_RES_FDBLOCKS :
+                                       XFS_TRANS_SB_FDBLOCKS, -slen);
+               XFS_STATS_INC(xs_allocx);
+               XFS_STATS_ADD(xs_allocb, args->len);
+       }
+       return 0;
+}
+
+/*
+ * Allocate a variable extent at exactly agno/bno.
+ * Extent's length (returned in *len) will be between minlen and maxlen,
+ * and of the form k * prod + mod unless there's nothing that large.
+ * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it.
+ */
+STATIC int                     /* error */
+xfs_alloc_ag_vextent_exact(
+       xfs_alloc_arg_t *args)  /* allocation argument structure */
+{
+       xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */
+       xfs_btree_cur_t *cnt_cur;/* by count btree cursor */
+       xfs_agblock_t   end;    /* end of allocated extent */
+       int             error;
+       xfs_agblock_t   fbno;   /* start block of found extent */
+       xfs_agblock_t   fend;   /* end block of found extent */
+       xfs_extlen_t    flen;   /* length of found extent */
+#ifdef XFS_ALLOC_TRACE
+       static char     fname[] = "xfs_alloc_ag_vextent_exact";
+#endif
+       int             i;      /* success/failure of operation */
+       xfs_agblock_t   maxend; /* end of maximal extent */
+       xfs_agblock_t   minend; /* end of minimal extent */
+       xfs_extlen_t    rlen;   /* length of returned extent */
+
+       ASSERT(args->alignment == 1);
+       /*
+        * Allocate/initialize a cursor for the by-number freespace btree.
+        */
+       bno_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp,
+               args->agno, XFS_BTNUM_BNO, 0, 0);
+       /*
+        * Lookup bno and minlen in the btree (minlen is irrelevant, really).
+        * Look for the closest free block <= bno, it must contain bno
+        * if any free block does.
+        */
+       if (error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen,
+                       &i))
+               goto error0;
+       if (!i) {
+               /*
+                * Didn't find it, return null.
+                */
+               xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
+               args->agbno = NULLAGBLOCK;
+               return 0;
+       }
+       /*
+        * Grab the freespace record.
+        */
+       if (error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i))
+               goto error0;
+       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+       ASSERT(fbno <= args->agbno);
+       minend = args->agbno + args->minlen;
+       maxend = args->agbno + args->maxlen;
+       fend = fbno + flen;
+       /* 
+        * Give up if the freespace isn't long enough for the minimum request.
+        */
+       if (fend < minend) {
+               xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
+               args->agbno = NULLAGBLOCK;
+               return 0;
+       }
+       /*
+        * End of extent will be smaller of the freespace end and the
+        * maximal requested end.
+        */
+       end = XFS_AGBLOCK_MIN(fend, maxend);
+       /*
+        * Fix the length according to mod and prod if given.
+        */
+       args->len = end - args->agbno;
+       xfs_alloc_fix_len(args);
+       if (!xfs_alloc_fix_minleft(args)) {
+               xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
+               return 0;
+       }
+       rlen = args->len;
+       ASSERT(args->agbno + rlen <= fend);
+       end = args->agbno + rlen;
+       /*
+        * We are allocating agbno for rlen [agbno .. end]
+        * Allocate/initialize a cursor for the by-size btree.
+        */
+       cnt_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp,
+               args->agno, XFS_BTNUM_CNT, 0, 0);
+       ASSERT(args->agbno + args->len <=
+               INT_GET(XFS_BUF_TO_AGF(args->agbp)->agf_length,
+                       ARCH_CONVERT));
+       if (error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, 
+                       args->agbno, args->len, XFSA_FIXUP_BNO_OK)) {
+               xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
+               goto error0;
+       }
+       xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
+       xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+       TRACE_ALLOC("normal", args);
+       args->wasfromfl = 0;
+       return 0;
+
+error0:
+       xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
+       TRACE_ALLOC("error", args);
+       return error;
+}
+
+/*
+ * Allocate a variable extent near bno in the allocation group agno.
+ * Extent's length (returned in len) will be between minlen and maxlen,
+ * and of the form k * prod + mod unless there's nothing that large.
+ * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
+ */
+STATIC int                             /* error */
+xfs_alloc_ag_vextent_near(
+       xfs_alloc_arg_t *args)          /* allocation argument structure */
+{
+       xfs_btree_cur_t *bno_cur_gt;    /* cursor for bno btree, right side */
+       xfs_btree_cur_t *bno_cur_lt;    /* cursor for bno btree, left side */
+       xfs_btree_cur_t *cnt_cur;       /* cursor for count btree */
+#ifdef XFS_ALLOC_TRACE
+       static char     fname[] = "xfs_alloc_ag_vextent_near";
+#endif
+       xfs_agblock_t   gtbno;          /* start bno of right side entry */
+       xfs_agblock_t   gtbnoa;         /* aligned ... */
+       xfs_extlen_t    gtdiff;         /* difference to right side entry */
+       xfs_extlen_t    gtlen;          /* length of right side entry */
+       xfs_extlen_t    gtlena;         /* aligned ... */
+       xfs_agblock_t   gtnew;          /* useful start bno of right side */
+       int             error;          /* error code */
+       int             i;              /* result code, temporary */
+       int             j;              /* result code, temporary */
+       xfs_agblock_t   ltbno;          /* start bno of left side entry */
+       xfs_agblock_t   ltbnoa;         /* aligned ... */
+       xfs_extlen_t    ltdiff;         /* difference to left side entry */
+       /*REFERENCED*/
+       xfs_agblock_t   ltend;          /* end bno of left side entry */
+       xfs_extlen_t    ltlen;          /* length of left side entry */
+       xfs_extlen_t    ltlena;         /* aligned ... */
+       xfs_agblock_t   ltnew;          /* useful start bno of left side */
+       xfs_extlen_t    rlen;           /* length of returned extent */
+#if defined(DEBUG) && defined(__KERNEL__)
+       /*
+        * Randomly don't execute the first algorithm.
+        */
+       static int      seed;           /* randomizing seed value */
+       int             dofirst;        /* set to do first algorithm */
+       timespec_t      now;            /* current time */
+
+       if (!seed) {
+               nanotime(&now);
+               seed = (int)now.tv_sec ^ (int)now.tv_nsec;
+       }
+       dofirst = random() & 1;
+#endif
+       /*
+        * Get a cursor for the by-size btree.
+        */
+       cnt_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp,
+               args->agno, XFS_BTNUM_CNT, 0, 0);
+       ltlen = 0;
+       bno_cur_lt = bno_cur_gt = NULL;
+       /*
+        * See if there are any free extents as big as maxlen.
+        */
+       if (error = xfs_alloc_lookup_ge(cnt_cur, 0, args->maxlen, &i))
+               goto error0;
+       /*
+        * If none, then pick up the last entry in the tree unless the
+        * tree is empty.
+        */ 
+       if (!i) {
+               if (error = xfs_alloc_ag_vextent_small(args, cnt_cur, &ltbno,
+                               &ltlen, &i))
+                       goto error0;
+               if (i == 0 || ltlen == 0) {
+                       xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+                       return 0;
+               }
+               ASSERT(i == 1);
+       }
+       args->wasfromfl = 0;
+       /* 
+        * First algorithm.
+        * If the requested extent is large wrt the freespaces available
+        * in this a.g., then the cursor will be pointing to a btree entry
+        * near the right edge of the tree.  If it's in the last btree leaf
+        * block, then we just examine all the entries in that block
+        * that are big enough, and pick the best one.
+        * This is written as a while loop so we can break out of it,
+        * but we never loop back to the top.
+        */
+       while (xfs_btree_islastblock(cnt_cur, 0)) {
+               xfs_extlen_t    bdiff;
+               int             besti;
+               xfs_extlen_t    blen;
+               xfs_agblock_t   bnew;
+
+#if defined(DEBUG) && defined(__KERNEL__)
+               if (!dofirst)
+                       break;
+#endif
+               /*
+                * Start from the entry that lookup found, sequence through
+                * all larger free blocks.  If we're actually pointing at a
+                * record smaller than maxlen, go to the start of this block,
+                * and skip all those smaller than minlen.
+                */
+               if (ltlen || args->alignment > 1) {
+                       cnt_cur->bc_ptrs[0] = 1;
+                       do {
+                               if (error = xfs_alloc_get_rec(cnt_cur, &ltbno,
+                                               &ltlen, &i))
+                                       goto error0;
+                               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+                               if (ltlen >= args->minlen)
+                                       break;
+                               if (error = xfs_alloc_increment(cnt_cur, 0, &i))
+                                       goto error0;
+                       } while (i);
+                       ASSERT(ltlen >= args->minlen);
+                       if (!i)
+                               break;
+               }
+               i = cnt_cur->bc_ptrs[0];
+               for (j = 1, blen = 0, bdiff = 0;
+                    !error && j && (blen < args->maxlen || bdiff > 0);
+                    error = xfs_alloc_increment(cnt_cur, 0, &j)) {
+                       /*
+                        * For each entry, decide if it's better than
+                        * the previous best entry.
+                        */
+                       if (error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen,
+                                       &i))
+                               goto error0;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+                       if (!xfs_alloc_compute_aligned(ltbno, ltlen,
+                                       args->alignment, args->minlen,
+                                       &ltbnoa, &ltlena))
+                               continue;
+                       args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
+                       xfs_alloc_fix_len(args);
+                       ASSERT(args->len >= args->minlen);
+                       if (args->len < blen)
+                               continue;
+                       ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
+                               args->alignment, ltbno, ltlen, &ltnew);
+                       if (ltnew != NULLAGBLOCK &&
+                           (args->len > blen || ltdiff < bdiff)) {
+                               bdiff = ltdiff;
+                               bnew = ltnew;
+                               blen = args->len;
+                               besti = cnt_cur->bc_ptrs[0];
+                       }
+               }
+               /*
+                * It didn't work.  We COULD be in a case where
+                * there's a good record somewhere, so try again.
+                */
+               if (blen == 0)
+                       break;
+               /*
+                * Point at the best entry, and retrieve it again.
+                */
+               cnt_cur->bc_ptrs[0] = besti;
+               if (error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i))
+                       goto error0;
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               ltend = ltbno + ltlen;
+               ASSERT(ltend <= INT_GET(XFS_BUF_TO_AGF(args->agbp)->agf_length,
+                               ARCH_CONVERT));
+               args->len = blen;
+               if (!xfs_alloc_fix_minleft(args)) {
+                       xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+                       TRACE_ALLOC("nominleft", args);
+                       return 0;
+               }
+               blen = args->len;
+               /*
+                * We are allocating starting at bnew for blen blocks.
+                */
+               args->agbno = bnew;
+               ASSERT(bnew >= ltbno);
+               ASSERT(bnew + blen <= ltend);
+               /*
+                * Set up a cursor for the by-bno tree.
+                */
+               bno_cur_lt = xfs_btree_init_cursor(args->mp, args->tp,
+                       args->agbp, args->agno, XFS_BTNUM_BNO, 0, 0);
+               /*
+                * Fix up the btree entries.
+                */
+               if (error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno,
+                               ltlen, bnew, blen, XFSA_FIXUP_CNT_OK))
+                       goto error0;
+               xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+               xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
+               TRACE_ALLOC("first", args);
+               return 0;
+       }
+       /*
+        * Second algorithm.
+        * Search in the by-bno tree to the left and to the right
+        * simultaneously, until in each case we find a space big enough,
+        * or run into the edge of the tree.  When we run into the edge,
+        * we deallocate that cursor.
+        * If both searches succeed, we compare the two spaces and pick
+        * the better one.
+        * With alignment, it's possible for both to fail; the upper
+        * level algorithm that picks allocation groups for allocations
+        * is not supposed to do this.
+        */
+       /*
+        * Allocate and initialize the cursor for the leftward search.
+        */
+       bno_cur_lt = xfs_btree_init_cursor(args->mp, args->tp, args->agbp,
+               args->agno, XFS_BTNUM_BNO, 0, 0);
+       /*
+        * Lookup <= bno to find the leftward search's starting point.
+        */
+       if (error = xfs_alloc_lookup_le(bno_cur_lt, args->agbno, args->maxlen,
+                       &i))
+               goto error0;
+       if (!i) {
+               /*
+                * Didn't find anything; use this cursor for the rightward
+                * search.
+                */
+               bno_cur_gt = bno_cur_lt;
+               bno_cur_lt = 0;
+       }
+       /*
+        * Found something.  Duplicate the cursor for the rightward search.
+        */
+       else if (error = xfs_btree_dup_cursor(bno_cur_lt, &bno_cur_gt))
+               goto error0;
+       /*
+        * Increment the cursor, so we will point at the entry just right
+        * of the leftward entry if any, or to the leftmost entry.
+        */
+       if (error = xfs_alloc_increment(bno_cur_gt, 0, &i))
+               goto error0;
+       if (!i) {
+               /*
+                * It failed, there are no rightward entries.
+                */
+               xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_NOERROR);
+               bno_cur_gt = NULL;
+       }
+       /*
+        * Loop going left with the leftward cursor, right with the
+        * rightward cursor, until either both directions give up or
+        * we find an entry at least as big as minlen.
+        */
+       do {
+               if (bno_cur_lt) {
+                       if (error = xfs_alloc_get_rec(bno_cur_lt, &ltbno,
+                                       &ltlen, &i))
+                               goto error0;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+                       if (xfs_alloc_compute_aligned(ltbno, ltlen,
+                                       args->alignment, args->minlen,
+                                       &ltbnoa, &ltlena))
+                               break;
+                       if (error = xfs_alloc_decrement(bno_cur_lt, 0, &i))
+                               goto error0;
+                       if (!i) {
+                               xfs_btree_del_cursor(bno_cur_lt,
+                                                    XFS_BTREE_NOERROR);
+                               bno_cur_lt = NULL;
+                       }
+               }
+               if (bno_cur_gt) {
+                       if (error = xfs_alloc_get_rec(bno_cur_gt, &gtbno,
+                                       &gtlen, &i))
+                               goto error0;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+                       if (xfs_alloc_compute_aligned(gtbno, gtlen,
+                                       args->alignment, args->minlen,
+                                       &gtbnoa, &gtlena))
+                               break;
+                       if (error = xfs_alloc_increment(bno_cur_gt, 0, &i))
+                               goto error0;
+                       if (!i) {
+                               xfs_btree_del_cursor(bno_cur_gt,
+                                                    XFS_BTREE_NOERROR);
+                               bno_cur_gt = NULL;
+                       }
+               }
+       } while (bno_cur_lt || bno_cur_gt);
+       /*
+        * Got both cursors still active, need to find better entry.
+        */
+       if (bno_cur_lt && bno_cur_gt) {
+               /*
+                * Left side is long enough, look for a right side entry.
+                */
+               if (ltlena >= args->minlen) {
+                       /*
+                        * Fix up the length.
+                        */
+                       args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
+                       xfs_alloc_fix_len(args);
+                       rlen = args->len;
+                       ltdiff = xfs_alloc_compute_diff(args->agbno, rlen,
+                               args->alignment, ltbno, ltlen, &ltnew);
+                       /*
+                        * Not perfect.
+                        */
+                       if (ltdiff) {
+                               /*
+                                * Look until we find a better one, run out of
+                                * space, or run off the end.
+                                */
+                               while (bno_cur_lt && bno_cur_gt) {
+                                       if (error = xfs_alloc_get_rec(
+                                                       bno_cur_gt, &gtbno,
+                                                       &gtlen, &i))
+                                               goto error0;
+                                       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+                                       xfs_alloc_compute_aligned(gtbno, gtlen,
+                                               args->alignment, args->minlen,
+                                               &gtbnoa, &gtlena);
+                                       /*
+                                        * The left one is clearly better.
+                                        */
+                                       if (gtbnoa >= args->agbno + ltdiff) {
+                                               xfs_btree_del_cursor(
+                                                       bno_cur_gt,
+                                                       XFS_BTREE_NOERROR);
+                                               bno_cur_gt = NULL;
+                                               break;
+                                       }
+                                       /*
+                                        * If we reach a big enough entry,
+                                        * compare the two and pick the best.
+                                        */
+                                       if (gtlena >= args->minlen) {
+                                               args->len =
+                                                       XFS_EXTLEN_MIN(gtlena,
+                                                               args->maxlen);
+                                               xfs_alloc_fix_len(args);
+                                               rlen = args->len;
+                                               gtdiff = xfs_alloc_compute_diff(
+                                                       args->agbno, rlen,
+                                                       args->alignment,
+                                                       gtbno, gtlen, &gtnew);
+                                               /*
+                                                * Right side is better.
+                                                */
+                                               if (gtdiff < ltdiff) {
+                                                       xfs_btree_del_cursor(
+                                                               bno_cur_lt,
+                                                               XFS_BTREE_NOERROR);
+                                                       bno_cur_lt = NULL;
+                                               }
+                                               /*
+                                                * Left side is better.
+                                                */
+                                               else {
+                                                       xfs_btree_del_cursor(
+                                                               bno_cur_gt,
+                                                               XFS_BTREE_NOERROR);
+                                                       bno_cur_gt = NULL;
+                                               }
+                                               break;
+                                       }
+                                       /*
+                                        * Fell off the right end.
+                                        */
+                                       if (error = xfs_alloc_increment(
+                                                       bno_cur_gt, 0, &i))
+                                               goto error0;
+                                       if (!i) {
+                                               xfs_btree_del_cursor(
+                                                       bno_cur_gt,
+                                                       XFS_BTREE_NOERROR);
+                                               bno_cur_gt = NULL;
+                                               break;
+                                       }
+                               }
+                       }
+                       /*
+                        * The left side is perfect, trash the right side.
+                        */
+                       else {
+                               xfs_btree_del_cursor(bno_cur_gt,
+                                                    XFS_BTREE_NOERROR);
+                               bno_cur_gt = NULL;
+                       }
+               }
+               /*
+                * It's the right side that was found first, look left.
+                */
+               else {
+                       /*
+                        * Fix up the length.
+                        */
+                       args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen);
+                       xfs_alloc_fix_len(args);
+                       rlen = args->len;
+                       gtdiff = xfs_alloc_compute_diff(args->agbno, rlen,
+                               args->alignment, gtbno, gtlen, &gtnew);
+                       /*
+                        * Right side entry isn't perfect.
+                        */
+                       if (gtdiff) {
+                               /*
+                                * Look until we find a better one, run out of
+                                * space, or run off the end.
+                                */
+                               while (bno_cur_lt && bno_cur_gt) {
+                                       if (error = xfs_alloc_get_rec(
+                                                       bno_cur_lt, &ltbno,
+                                                       &ltlen, &i))
+                                               goto error0;
+                                       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+                                       xfs_alloc_compute_aligned(ltbno, ltlen,
+                                               args->alignment, args->minlen,
+                                               &ltbnoa, &ltlena);
+                                       /*
+                                        * The right one is clearly better.
+                                        */
+                                       if (ltbnoa <= args->agbno - gtdiff) {
+                                               xfs_btree_del_cursor(
+                                                       bno_cur_lt,
+                                                       XFS_BTREE_NOERROR);
+                                               bno_cur_lt = NULL;
+                                               break;
+                                       }
+                                       /*
+                                        * If we reach a big enough entry,
+                                        * compare the two and pick the best.
+                                        */
+                                       if (ltlena >= args->minlen) {
+                                               args->len = XFS_EXTLEN_MIN(
+                                                       ltlena, args->maxlen);
+                                               xfs_alloc_fix_len(args);
+                                               rlen = args->len;
+                                               ltdiff = xfs_alloc_compute_diff(
+                                                       args->agbno, rlen,
+                                                       args->alignment,
+                                                       ltbno, ltlen, &ltnew);
+                                               /*
+                                                * Left side is better.
+                                                */
+                                               if (ltdiff < gtdiff) {
+                                                       xfs_btree_del_cursor(
+                                                               bno_cur_gt,
+                                                               XFS_BTREE_NOERROR);
+                                                       bno_cur_gt = NULL;
+                                               }
+                                               /*
+                                                * Right side is better.
+                                                */
+                                               else {
+                                                       xfs_btree_del_cursor(
+                                                               bno_cur_lt,
+                                                               XFS_BTREE_NOERROR);
+                                                       bno_cur_lt = NULL;
+                                               }
+                                               break;
+                                       }
+                                       /*
+                                        * Fell off the left end.
+                                        */
+                                       if (error = xfs_alloc_decrement(
+                                                       bno_cur_lt, 0, &i))
+                                               goto error0;
+                                       if (!i) {
+                                               xfs_btree_del_cursor(bno_cur_lt,
+                                                       XFS_BTREE_NOERROR);
+                                               bno_cur_lt = NULL;
+                                               break;
+                                       }
+                               }
+                       }
+                       /*
+                        * The right side is perfect, trash the left side.
+                        */
+                       else {
+                               xfs_btree_del_cursor(bno_cur_lt,
+                                       XFS_BTREE_NOERROR);
+                               bno_cur_lt = NULL;
+                       }
+               }
+       }
+       /*
+        * If we couldn't get anything, give up.
+        */
+       if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
+               TRACE_ALLOC("neither", args);
+               args->agbno = NULLAGBLOCK;
+               return 0;
+       }
+       /*
+        * At this point we have selected a freespace entry, either to the
+        * left or to the right.  If it's on the right, copy all the
+        * useful variables to the "left" set so we only have one
+        * copy of this code.
+        */
+       if (bno_cur_gt) {
+               bno_cur_lt = bno_cur_gt;
+               bno_cur_gt = NULL;
+               ltbno = gtbno;
+               ltbnoa = gtbnoa;
+               ltlen = gtlen;
+               ltlena = gtlena;
+               j = 1;
+       } else
+               j = 0;
+       /*
+        * Fix up the length and compute the useful address.
+        */
+       ltend = ltbno + ltlen;
+       args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
+       xfs_alloc_fix_len(args);
+       if (!xfs_alloc_fix_minleft(args)) {
+               TRACE_ALLOC("nominleft", args);
+               xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
+               xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+               return 0;
+       }
+       rlen = args->len;
+       (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, ltbno,
+               ltlen, &ltnew);
+       ASSERT(ltnew >= ltbno);
+       ASSERT(ltnew + rlen <= ltend);
+       ASSERT(ltnew + rlen <= INT_GET(XFS_BUF_TO_AGF(args->agbp)->agf_length,
+               ARCH_CONVERT));
+       args->agbno = ltnew;
+       if (error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
+                       ltnew, rlen, XFSA_FIXUP_BNO_OK))
+               goto error0;
+       TRACE_ALLOC(j ? "gt" : "lt", args);
+       xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+       xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
+       return 0;
+
+ error0:
+       TRACE_ALLOC("error", args);
+       if (cnt_cur != NULL)
+               xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
+       if (bno_cur_lt != NULL)
+               xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_ERROR);
+       if (bno_cur_gt != NULL)
+               xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_ERROR);
+       return error;
+}
+
+/*
+ * Allocate a variable extent anywhere in the allocation group agno.
+ * Extent's length (returned in len) will be between minlen and maxlen,
+ * and of the form k * prod + mod unless there's nothing that large.
+ * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
+ */
+STATIC int                             /* error */
+xfs_alloc_ag_vextent_size(
+       xfs_alloc_arg_t *args)          /* allocation argument structure */
+{
+       xfs_btree_cur_t *bno_cur;       /* cursor for bno btree */
+       xfs_btree_cur_t *cnt_cur;       /* cursor for cnt btree */
+       int             error;          /* error result */
+       xfs_agblock_t   fbno;           /* start of found freespace */
+       xfs_extlen_t    flen;           /* length of found freespace */
+#ifdef XFS_ALLOC_TRACE
+       static char     fname[] = "xfs_alloc_ag_vextent_size";
+#endif
+       int             i;              /* temp status variable */
+       xfs_agblock_t   rbno;           /* returned block number */
+       xfs_extlen_t    rlen;           /* length of returned extent */
+
+       /*
+        * Allocate and initialize a cursor for the by-size btree.
+        */
+       cnt_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp,
+               args->agno, XFS_BTNUM_CNT, 0, 0);
+       bno_cur = NULL;
+       /*
+        * Look for an entry >= maxlen+alignment-1 blocks.
+        */
+       if (error = xfs_alloc_lookup_ge(cnt_cur, 0,
+                       args->maxlen + args->alignment - 1, &i))
+               goto error0;
+       /*
+        * If none, then pick up the last entry in the tree unless the
+        * tree is empty.
+        */ 
+       if (!i) {
+               if (error = xfs_alloc_ag_vextent_small(args, cnt_cur, &fbno,
+                               &flen, &i))
+                       goto error0;
+               if (i == 0 || flen == 0) {
+                       xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+                       TRACE_ALLOC("noentry", args);
+                       return 0;
+               }
+               ASSERT(i == 1);
+       }
+       /*
+        * There's a freespace as big as maxlen+alignment-1, get it.
+        */
+       else {
+               if (error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i))
+                       goto error0;
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+       }
+       /*
+        * In the first case above, we got the last entry in the
+        * by-size btree.  Now we check to see if the space hits maxlen
+        * once aligned; if not, we search left for something better.
+        * This can't happen in the second case above.
+        */
+       xfs_alloc_compute_aligned(fbno, flen, args->alignment, args->minlen,
+               &rbno, &rlen);
+       rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
+       XFS_WANT_CORRUPTED_GOTO(rlen == 0 || 
+                       (rlen <= flen && rbno + rlen <= fbno + flen), error0);
+       if (rlen < args->maxlen) {
+               xfs_agblock_t   bestfbno;
+               xfs_extlen_t    bestflen;
+               xfs_agblock_t   bestrbno;
+               xfs_extlen_t    bestrlen;
+
+               bestrlen = rlen;
+               bestrbno = rbno;
+               bestflen = flen;
+               bestfbno = fbno;
+               for (;;) {
+                       if (error = xfs_alloc_decrement(cnt_cur, 0, &i))
+                               goto error0;
+                       if (i == 0)
+                               break;
+                       if (error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
+                                       &i))
+                               goto error0;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+                       if (flen < bestrlen)
+                               break;
+                       xfs_alloc_compute_aligned(fbno, flen, args->alignment,
+                               args->minlen, &rbno, &rlen);
+                       rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
+                       XFS_WANT_CORRUPTED_GOTO(rlen == 0 ||
+                               (rlen <= flen && rbno + rlen <= fbno + flen),
+                               error0);
+                       if (rlen > bestrlen) {
+                               bestrlen = rlen;
+                               bestrbno = rbno;
+                               bestflen = flen;
+                               bestfbno = fbno;
+                               if (rlen == args->maxlen)
+                                       break;
+                       }
+               } 
+               if (error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
+                               &i))
+                       goto error0;
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               rlen = bestrlen;
+               rbno = bestrbno;
+               flen = bestflen;
+               fbno = bestfbno;
+       }
+       args->wasfromfl = 0;
+       /*
+        * Fix up the length.
+        */
+       args->len = rlen;
+       xfs_alloc_fix_len(args);
+       if (rlen < args->minlen || !xfs_alloc_fix_minleft(args)) {
+               xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+               TRACE_ALLOC("nominleft", args);
+               args->agbno = NULLAGBLOCK;
+               return 0;
+       }
+       rlen = args->len;
+       XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0);
+       /*
+        * Allocate and initialize a cursor for the by-block tree.
+        */
+       bno_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp,
+               args->agno, XFS_BTNUM_BNO, 0, 0);
+       if (error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
+                       rbno, rlen, XFSA_FIXUP_CNT_OK))
+               goto error0;
+       xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+       xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
+       cnt_cur = bno_cur = NULL;
+       args->len = rlen;
+       args->agbno = rbno;
+       XFS_WANT_CORRUPTED_GOTO(
+               args->agbno + args->len <=
+                       INT_GET(XFS_BUF_TO_AGF(args->agbp)->agf_length,
+                       ARCH_CONVERT),
+               error0);
+       TRACE_ALLOC("normal", args);
+       return 0;
+
+error0:
+       TRACE_ALLOC("error", args);
+       if (cnt_cur)
+               xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
+       if (bno_cur)
+               xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
+       return error;
+}
+
+/*
+ * Deal with the case where only small freespaces remain.
+ * Either return the contents of the last freespace record,
+ * or allocate space from the freelist if there is nothing in the tree.
+ */
+STATIC int                     /* error */
+xfs_alloc_ag_vextent_small(
+       xfs_alloc_arg_t *args,  /* allocation argument structure */
+       xfs_btree_cur_t *ccur,  /* by-size cursor */
+       xfs_agblock_t   *fbnop, /* result block number */
+       xfs_extlen_t    *flenp, /* result length */
+       int             *stat)  /* status: 0-freelist, 1-normal/none */
+{
+       int             error;
+       xfs_agblock_t   fbno;
+       xfs_extlen_t    flen;
+#ifdef XFS_ALLOC_TRACE
+       static char     fname[] = "xfs_alloc_ag_vextent_small";
+#endif
+       int             i;
+
+       if (error = xfs_alloc_decrement(ccur, 0, &i))
+               goto error0;
+       if (i) {
+               if (error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i))
+                       goto error0;
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+       }
+       /*
+        * Nothing in the btree, try the freelist.  Make sure
+        * to respect minleft even when pulling from the
+        * freelist.
+        */
+       else if (args->minlen == 1 && args->alignment == 1 && !args->isfl &&
+                (INT_GET(XFS_BUF_TO_AGF(args->agbp)->agf_flcount,
+                       ARCH_CONVERT) > args->minleft)) {
+               if (error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno))
+                       goto error0;
+               if (fbno != NULLAGBLOCK) {
+                       if (args->userdata) {
+                               xfs_buf_t       *bp;
+
+                               bp = xfs_btree_get_bufs(args->mp, args->tp,
+                                       args->agno, fbno, 0);
+                               xfs_trans_binval(args->tp, bp);
+                               /*
+                                * Since blocks move to the free list without
+                                * the coordination used in xfs_bmap_finish,
+                                * we can't allow the user to write to the
+                                * block until we know that the transaction
+                                * that moved it to the free list is
+                                * permanently on disk.  The only way to
+                                * ensure that is to make this transaction
+                                * synchronous.
+                                */
+                               xfs_trans_set_sync(args->tp);
+                       }
+                       args->len = 1;
+                       args->agbno = fbno;
+                       XFS_WANT_CORRUPTED_GOTO(
+                               args->agbno + args->len <=
+                               INT_GET(XFS_BUF_TO_AGF(args->agbp)->agf_length,
+                                       ARCH_CONVERT),
+                               error0);
+                       args->wasfromfl = 1;
+                       TRACE_ALLOC("freelist", args);
+                       *stat = 0;
+                       return 0;
+               }
+               /*
+                * Nothing in the freelist.
+                */
+               else
+                       flen = 0;
+       }
+       /*
+        * Can't allocate from the freelist for some reason.
+        */
+       else
+               flen = 0;
+       /*
+        * Can't do the allocation, give up.
+        */
+       if (flen < args->minlen) {
+               args->agbno = NULLAGBLOCK;
+               TRACE_ALLOC("notenough", args);
+               flen = 0;
+       }
+       *fbnop = fbno;
+       *flenp = flen;
+       *stat = 1;
+       TRACE_ALLOC("normal", args);
+       return 0;
+
+error0:
+       TRACE_ALLOC("error", args);
+       return error;
+}
+
+/*
+ * Free the extent starting at agno/bno for length.
+ */
+STATIC int                     /* error */
+xfs_free_ag_extent(
+       xfs_trans_t     *tp,    /* transaction pointer */
+       xfs_buf_t               *agbp,  /* buffer for a.g. freelist header */
+       xfs_agnumber_t  agno,   /* allocation group number */
+       xfs_agblock_t   bno,    /* starting block number */
+       xfs_extlen_t    len,    /* length of extent */
+       int             isfl)   /* set if is freelist blocks - no sb acctg */
+{
+       xfs_btree_cur_t *bno_cur;       /* cursor for by-block btree */
+       xfs_btree_cur_t *cnt_cur;       /* cursor for by-size btree */
+       int             error;          /* error return value */
+#ifdef XFS_ALLOC_TRACE
+       static char     fname[] = "xfs_free_ag_extent";
+#endif
+       xfs_agblock_t   gtbno;          /* start of right neighbor block */
+       xfs_extlen_t    gtlen;          /* length of right neighbor block */
+       int             haveleft;       /* have a left neighbor block */
+       int             haveright;      /* have a right neighbor block */
+       int             i;              /* temp, result code */
+       xfs_agblock_t   ltbno;          /* start of left neighbor block */
+       xfs_extlen_t    ltlen;          /* length of left neighbor block */
+       xfs_mount_t     *mp;            /* mount point struct for filesystem */
+       xfs_agblock_t   nbno;           /* new starting block of freespace */
+       xfs_extlen_t    nlen;           /* new length of freespace */
+
+       mp = tp->t_mountp;
+       /* 
+        * Allocate and initialize a cursor for the by-block btree.
+        */
+       bno_cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO, 0,
+               0);
+       cnt_cur = NULL;
+       /* 
+        * Look for a neighboring block on the left (lower block numbers)
+        * that is contiguous with this space.
+        */
+       if (error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft))
+               goto error0;
+       if (haveleft) {
+               /*
+                * There is a block to our left.
+                */
+               if (error = xfs_alloc_get_rec(bno_cur, &ltbno, &ltlen, &i))
+                       goto error0;
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               /*
+                * It's not contiguous, though.
+                */
+               if (ltbno + ltlen < bno)
+                       haveleft = 0;
+               else {
+                       /*
+                        * If this failure happens the request to free this
+                        * space was invalid, it's (partly) already free.
+                        * Very bad.
+                        */
+                       XFS_WANT_CORRUPTED_GOTO(ltbno + ltlen <= bno, error0);
+               }
+       }
+       /* 
+        * Look for a neighboring block on the right (higher block numbers)
+        * that is contiguous with this space.
+        */
+       if (error = xfs_alloc_increment(bno_cur, 0, &haveright))
+               goto error0;
+       if (haveright) {
+               /*
+                * There is a block to our right.
+                */
+               if (error = xfs_alloc_get_rec(bno_cur, &gtbno, &gtlen, &i))
+                       goto error0;
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               /*
+                * It's not contiguous, though.
+                */
+               if (bno + len < gtbno)
+                       haveright = 0;
+               else {
+                       /*
+                        * If this failure happens the request to free this
+                        * space was invalid, it's (partly) already free.
+                        * Very bad.
+                        */
+                       XFS_WANT_CORRUPTED_GOTO(gtbno >= bno + len, error0);
+               }
+       }
+       /*
+        * Now allocate and initialize a cursor for the by-size tree.
+        */
+       cnt_cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT, 0,
+               0);
+       /*
+        * Have both left and right contiguous neighbors.
+        * Merge all three into a single free block.
+        */
+       if (haveleft && haveright) {
+               /*
+                * Delete the old by-size entry on the left.
+                */
+               if (error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))
+                       goto error0;
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               if (error = xfs_alloc_delete(cnt_cur, &i))
+                       goto error0;
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               /*
+                * Delete the old by-size entry on the right.
+                */
+               if (error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))
+                       goto error0;
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               if (error = xfs_alloc_delete(cnt_cur, &i))
+                       goto error0;
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               /*
+                * Delete the old by-block entry for the right block.
+                */
+               if (error = xfs_alloc_delete(bno_cur, &i))
+                       goto error0;
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               /*
+                * Move the by-block cursor back to the left neighbor.
+                */
+               if (error = xfs_alloc_decrement(bno_cur, 0, &i))
+                       goto error0;
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+#ifdef DEBUG
+               /*
+                * Check that this is the right record: delete didn't
+                * mangle the cursor.
+                */
+               {
+                       xfs_agblock_t   xxbno;
+                       xfs_extlen_t    xxlen;
+
+                       if (error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
+                                       &i))
+                               goto error0;
+                       XFS_WANT_CORRUPTED_GOTO(
+                               i == 1 && xxbno == ltbno && xxlen == ltlen,
+                               error0);
+               }
+#endif
+               /*
+                * Update remaining by-block entry to the new, joined block.
+                */
+               nbno = ltbno;
+               nlen = len + ltlen + gtlen;
+               if (error = xfs_alloc_update(bno_cur, nbno, nlen))
+                       goto error0;
+       }
+       /*
+        * Have only a left contiguous neighbor.
+        * Merge it together with the new freespace.
+        */
+       else if (haveleft) {
+               /*
+                * Delete the old by-size entry on the left.
+                */
+               if (error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))
+                       goto error0;
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               if (error = xfs_alloc_delete(cnt_cur, &i))
+                       goto error0;
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               /*
+                * Back up the by-block cursor to the left neighbor, and
+                * update its length.
+                */
+               if (error = xfs_alloc_decrement(bno_cur, 0, &i))
+                       goto error0;
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               nbno = ltbno;
+               nlen = len + ltlen;
+               if (error = xfs_alloc_update(bno_cur, nbno, nlen))
+                       goto error0;
+       }
+       /*
+        * Have only a right contiguous neighbor.
+        * Merge it together with the new freespace.
+        */
+       else if (haveright) {
+               /*
+                * Delete the old by-size entry on the right.
+                */
+               if (error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))
+                       goto error0;
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               if (error = xfs_alloc_delete(cnt_cur, &i))
+                       goto error0;
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               /*
+                * Update the starting block and length of the right 
+                * neighbor in the by-block tree.
+                */
+               nbno = bno;
+               nlen = len + gtlen;
+               if (error = xfs_alloc_update(bno_cur, nbno, nlen))
+                       goto error0;
+       }
+       /*
+        * No contiguous neighbors.
+        * Insert the new freespace into the by-block tree.
+        */
+       else {
+               nbno = bno;
+               nlen = len;
+               if (error = xfs_alloc_insert(bno_cur, &i))
+                       goto error0;
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+       }
+       xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
+       bno_cur = NULL;
+       /*
+        * In all cases we need to insert the new freespace in the by-size tree.
+        */
+       if (error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i))
+               goto error0;
+       XFS_WANT_CORRUPTED_GOTO(i == 0, error0);
+       if (error = xfs_alloc_insert(cnt_cur, &i))
+               goto error0;
+       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+       xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+       cnt_cur = NULL;
+       /*
+        * Update the freespace totals in the ag and superblock.
+        */
+       {
+               xfs_agf_t       *agf;
+               xfs_perag_t     *pag;           /* per allocation group data */
+
+               agf = XFS_BUF_TO_AGF(agbp);
+               pag = &mp->m_perag[agno];
+               INT_MOD(agf->agf_freeblks, ARCH_CONVERT, len);
+               xfs_trans_agblocks_delta(tp, len);
+               pag->pagf_freeblks += len;
+               XFS_WANT_CORRUPTED_GOTO(
+                       INT_GET(agf->agf_freeblks, ARCH_CONVERT)
+                               <= INT_GET(agf->agf_length, ARCH_CONVERT),
+                       error0);
+               TRACE_MODAGF(NULL, agf, XFS_AGF_FREEBLKS);
+               xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
+               if (!isfl)
+                       xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len);
+               XFS_STATS_INC(xs_freex);
+               XFS_STATS_ADD(xs_freeb, len);
+       }
+       TRACE_FREE(haveleft ?
+                       (haveright ? "both" : "left") :
+                       (haveright ? "right" : "none"),
+               agno, bno, len, isfl);
+       return 0;
+
+ error0:
+       TRACE_FREE("error", agno, bno, len, isfl);
+       if (bno_cur)
+               xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
+       if (cnt_cur)
+               xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
+       return error;
+}
+
+/* 
+ * Visible (exported) allocation/free functions.
+ * Some of these are used just by xfs_alloc_btree.c and this file.
+ */
+
+/*
+ * Compute and fill in value of m_ag_maxlevels.
+ */
+void
+xfs_alloc_compute_maxlevels(
+       xfs_mount_t     *mp)    /* file system mount structure */
+{
+       int             level;
+       uint            maxblocks;
+       uint            maxleafents;
+       int             minleafrecs;
+       int             minnoderecs;
+
+       maxleafents = (mp->m_sb.sb_agblocks + 1) / 2;
+       minleafrecs = mp->m_alloc_mnr[0];
+       minnoderecs = mp->m_alloc_mnr[1];
+       maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
+       for (level = 1; maxblocks > 1; level++)
+               maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
+       mp->m_ag_maxlevels = level;
+}
+
+/*
+ * Decide whether to use this allocation group for this allocation.
+ * If so, fix up the btree freelist's size.
+ * This is external so mkfs can call it, too.
+ */
+int                            /* error */
+xfs_alloc_fix_freelist(
+       xfs_alloc_arg_t *args,  /* allocation argument structure */
+       int             flags)  /* XFS_ALLOC_FLAG_... */
+{
+       xfs_buf_t       *agbp;  /* agf buffer pointer */
+       xfs_agf_t       *agf;   /* a.g. freespace structure pointer */
+       xfs_buf_t       *agflbp;/* agfl buffer pointer */
+       xfs_agblock_t   bno;    /* freelist block */
+       xfs_extlen_t    delta;  /* new blocks needed in freelist */
+       int             error;  /* error result code */
+       xfs_extlen_t    longest;/* longest extent in allocation group */
+       xfs_mount_t     *mp;    /* file system mount point structure */
+       xfs_extlen_t    need;   /* total blocks needed in freelist */
+       xfs_perag_t     *pag;   /* per-ag information structure */
+       xfs_alloc_arg_t targs;  /* local allocation arguments */
+       xfs_trans_t     *tp;    /* transaction pointer */
+
+       mp = args->mp;
+
+       pag = args->pag;
+       tp = args->tp;
+       if (!pag->pagf_init) {
+               if (error = xfs_alloc_read_agf(mp, tp, args->agno, flags,
+                               &agbp))
+                       return error;
+               if (!pag->pagf_init) {
+                       args->agbp = NULL;
+                       return 0;
+               }
+       } else
+               agbp = NULL;
+       need = XFS_MIN_FREELIST_PAG(pag, mp);
+       delta = need > pag->pagf_flcount ? need - pag->pagf_flcount : 0;
+       /*
+        * If it looks like there isn't a long enough extent, or enough
+        * total blocks, reject it.
+        */
+       longest = (pag->pagf_longest > delta) ?
+               (pag->pagf_longest - delta) :
+               (pag->pagf_flcount > 0 || pag->pagf_longest > 0);
+       if (args->minlen + args->alignment + args->minalignslop - 1 > longest ||
+           (args->minleft &&
+            (int)(pag->pagf_freeblks + pag->pagf_flcount -
+                  need - args->total) <
+            (int)args->minleft)) {
+               if (agbp)
+                       xfs_trans_brelse(tp, agbp);
+               args->agbp = NULL;
+               return 0;
+       }
+       /*
+        * Get the a.g. freespace buffer.
+        * Can fail if we're not blocking on locks, and it's held.
+        */
+       if (agbp == NULL) {
+               if (error = xfs_alloc_read_agf(mp, tp, args->agno, flags,
+                               &agbp))
+                       return error;
+               if (agbp == NULL) {
+                       args->agbp = NULL;
+                       return 0;
+               }
+       }
+       /*
+        * Figure out how many blocks we should have in the freelist.
+        */
+       agf = XFS_BUF_TO_AGF(agbp);
+       need = XFS_MIN_FREELIST(agf, mp);
+       delta = need > INT_GET(agf->agf_flcount, ARCH_CONVERT) ?
+               (need - INT_GET(agf->agf_flcount, ARCH_CONVERT)) : 0;
+       /*
+        * If there isn't enough total or single-extent, reject it.
+        */
+       longest = INT_GET(agf->agf_longest, ARCH_CONVERT);
+       longest = (longest > delta) ? (longest - delta) :
+               (INT_GET(agf->agf_flcount, ARCH_CONVERT) > 0 || longest > 0);
+       if (args->minlen + args->alignment + args->minalignslop - 1 > longest ||
+            (args->minleft &&
+               (int)(INT_GET(agf->agf_freeblks, ARCH_CONVERT) +
+                  INT_GET(agf->agf_flcount, ARCH_CONVERT) - need - args->total) <
+            (int)args->minleft)) {
+               xfs_trans_brelse(tp, agbp);
+               args->agbp = NULL;
+               return 0;
+       }
+       /*
+        * Make the freelist shorter if it's too long.
+        */
+       while (INT_GET(agf->agf_flcount, ARCH_CONVERT) > need) {
+               xfs_buf_t       *bp;
+
+               if (error = xfs_alloc_get_freelist(tp, agbp, &bno))
+                       return error;
+               if (error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1))
+                       return error;
+               bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0);
+               xfs_trans_binval(tp, bp);
+               /*
+                * Since blocks move to the free list without
+                * the coordination used in xfs_bmap_finish,
+                * we can't allow block to be available for reallocation
+                * and non-transaction writing (user data)
+                * until we know that the transaction
+                * that moved it to the free list is
+                * permanently on disk.  The only way to
+                * ensure that is to make this transaction
+                * synchronous.  The one exception to this
+                * is in the case of wsync-mounted filesystem
+                * where we know that any block that made it
+                * onto the freelist won't be seen again in
+                * the file from which it came since the transactions
+                * that free metadata blocks or shrink inodes in
+                * wsync filesystems are all themselves synchronous.
+                */
+               if (!(mp->m_flags & XFS_MOUNT_WSYNC))
+                       xfs_trans_set_sync(tp);
+       }
+       /*
+        * Initialize the args structure.
+        */
+       targs.tp = tp;
+       targs.mp = mp;
+       targs.agbp = agbp;
+       targs.agno = args->agno;
+       targs.mod = targs.minleft = targs.wasdel = targs.userdata =
+               targs.minalignslop = 0;
+       targs.alignment = targs.minlen = targs.prod = targs.isfl = 1;
+       targs.type = XFS_ALLOCTYPE_THIS_AG;
+       targs.pag = pag;
+       if (error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp))
+               return error;
+       /*
+        * Make the freelist longer if it's too short.
+        */
+       while (INT_GET(agf->agf_flcount, ARCH_CONVERT) < need) {
+               targs.agbno = 0;
+               targs.maxlen = need - INT_GET(agf->agf_flcount, ARCH_CONVERT);
+               /*
+                * Allocate as many blocks as possible at once.
+                */
+               if (error = xfs_alloc_ag_vextent(&targs))
+                       return error;
+               /*
+                * Stop if we run out.  Won't happen if callers are obeying
+                * the restrictions correctly.  Can happen for free calls
+                * on a completely full ag.
+                */
+               if (targs.agbno == NULLAGBLOCK)
+                       break;
+               /*
+                * Put each allocated block on the list.
+                */
+               for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
+                       if (error = xfs_alloc_put_freelist(tp, agbp, agflbp,
+                                       bno))
+                               return error;
+               }
+       }
+       args->agbp = agbp;
+       return 0;
+}
+
+/*
+ * Get a block from the freelist.
+ * Returns with the buffer for the block gotten.
+ */
+int                            /* error */
+xfs_alloc_get_freelist(
+       xfs_trans_t     *tp,    /* transaction pointer */
+       xfs_buf_t       *agbp,  /* buffer containing the agf structure */
+       xfs_agblock_t   *bnop)  /* block address retrieved from freelist */
+{
+       xfs_agf_t       *agf;   /* a.g. freespace structure */
+       xfs_agfl_t      *agfl;  /* a.g. freelist structure */
+       xfs_buf_t       *agflbp;/* buffer for a.g. freelist structure */
+       xfs_agblock_t   bno;    /* block number returned */
+       int             error;
+#ifdef XFS_ALLOC_TRACE
+       static char     fname[] = "xfs_alloc_get_freelist";
+#endif
+       xfs_mount_t     *mp;    /* mount structure */
+       xfs_perag_t     *pag;   /* per allocation group data */
+
+       agf = XFS_BUF_TO_AGF(agbp);
+       /*
+        * Freelist is empty, give up.
+        */
+       if (INT_GET(agf->agf_flcount, ARCH_CONVERT) == 0) {
+               *bnop = NULLAGBLOCK;
+               return 0;
+       }
+       /*
+        * Read the array of free blocks.
+        */
+       mp = tp->t_mountp;
+       if (error = xfs_alloc_read_agfl(mp, tp,
+                       INT_GET(agf->agf_seqno, ARCH_CONVERT), &agflbp))
+               return error;
+       agfl = XFS_BUF_TO_AGFL(agflbp);
+       /*
+        * Get the block number and update the data structures.
+        */
+       bno = INT_GET(agfl->agfl_bno[INT_GET(agf->agf_flfirst, ARCH_CONVERT)], ARCH_CONVERT);
+       INT_MOD(agf->agf_flfirst, ARCH_CONVERT, 1);
+       xfs_trans_brelse(tp, agflbp);
+       if (INT_GET(agf->agf_flfirst, ARCH_CONVERT) == XFS_AGFL_SIZE)
+               INT_ZERO(agf->agf_flfirst, ARCH_CONVERT);
+       pag = &mp->m_perag[INT_GET(agf->agf_seqno, ARCH_CONVERT)];
+       INT_MOD(agf->agf_flcount, ARCH_CONVERT, -1);
+       xfs_trans_agflist_delta(tp, -1);
+       pag->pagf_flcount--;
+       TRACE_MODAGF(NULL, agf, XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT);
+       xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT);
+       *bnop = bno;
+       return 0;
+}
+
+/*
+ * Log the given fields from the agf structure.
+ */
+void
+xfs_alloc_log_agf(
+       xfs_trans_t     *tp,    /* transaction pointer */
+       xfs_buf_t       *bp,    /* buffer for a.g. freelist header */
+       int             fields) /* mask of fields to be logged (XFS_AGF_...) */
+{
+       int     first;          /* first byte offset */
+       int     last;           /* last byte offset */
+       static const short      offsets[] = {
+               offsetof(xfs_agf_t, agf_magicnum),
+               offsetof(xfs_agf_t, agf_versionnum),
+               offsetof(xfs_agf_t, agf_seqno),
+               offsetof(xfs_agf_t, agf_length),
+               offsetof(xfs_agf_t, agf_roots[0]),
+               offsetof(xfs_agf_t, agf_levels[0]),
+               offsetof(xfs_agf_t, agf_flfirst),
+               offsetof(xfs_agf_t, agf_fllast),
+               offsetof(xfs_agf_t, agf_flcount),
+               offsetof(xfs_agf_t, agf_freeblks),
+               offsetof(xfs_agf_t, agf_longest),
+               sizeof(xfs_agf_t)
+       };
+
+       xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
+       xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
+}
+
+/*
+ * Interface for inode allocation to force the pag data to be initialized.
+ */
+int                                    /* error */
+xfs_alloc_pagf_init(
+       xfs_mount_t             *mp,    /* file system mount structure */
+       xfs_trans_t             *tp,    /* transaction pointer */
+       xfs_agnumber_t          agno,   /* allocation group number */
+       int                     flags)  /* XFS_ALLOC_FLAGS_... */
+{
+       xfs_buf_t                       *bp;
+       int                     error;
+
+       if (error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp))
+               return error;
+       if (bp)
+               xfs_trans_brelse(tp, bp);
+       return 0;
+}
+
+/*
+ * Put the block on the freelist for the allocation group.
+ */
+int                                    /* error */
+xfs_alloc_put_freelist(
+       xfs_trans_t             *tp,    /* transaction pointer */
+       xfs_buf_t               *agbp,  /* buffer for a.g. freelist header */
+       xfs_buf_t               *agflbp,/* buffer for a.g. free block array */
+       xfs_agblock_t           bno)    /* block being freed */
+{
+       xfs_agf_t               *agf;   /* a.g. freespace structure */
+       xfs_agfl_t              *agfl;  /* a.g. free block array */
+       xfs_agblock_t           *blockp;/* pointer to array entry */
+       int                     error;
+#ifdef XFS_ALLOC_TRACE
+       static char             fname[] = "xfs_alloc_put_freelist";
+#endif
+       xfs_mount_t             *mp;    /* mount structure */
+       xfs_perag_t             *pag;   /* per allocation group data */
+
+       agf = XFS_BUF_TO_AGF(agbp);
+       mp = tp->t_mountp;
+
+       if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp,
+                       INT_GET(agf->agf_seqno, ARCH_CONVERT), &agflbp)))
+               return error;
+       agfl = XFS_BUF_TO_AGFL(agflbp);
+        INT_MOD(agf->agf_fllast, ARCH_CONVERT, 1);
+        if (INT_GET(agf->agf_fllast, ARCH_CONVERT) == XFS_AGFL_SIZE)
+               INT_ZERO(agf->agf_fllast, ARCH_CONVERT);
+       pag = &mp->m_perag[INT_GET(agf->agf_seqno, ARCH_CONVERT)];
+       INT_MOD(agf->agf_flcount, ARCH_CONVERT, 1);
+       xfs_trans_agflist_delta(tp, 1);
+       pag->pagf_flcount++;
+       ASSERT(INT_GET(agf->agf_flcount, ARCH_CONVERT) <= XFS_AGFL_SIZE);
+       blockp = &agfl->agfl_bno[INT_GET(agf->agf_fllast, ARCH_CONVERT)];
+       INT_SET(*blockp, ARCH_CONVERT, bno);
+       TRACE_MODAGF(NULL, agf, XFS_AGF_FLLAST | XFS_AGF_FLCOUNT);
+       xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLLAST | XFS_AGF_FLCOUNT);
+       xfs_trans_log_buf(tp, agflbp,
+               (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl),
+               (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl +
+                       sizeof(xfs_agblock_t) - 1));
+       return 0;
+}
+
+/*
+ * Read in the allocation group header (free/alloc section).
+ */
+int                                    /* error */
+xfs_alloc_read_agf(
+       xfs_mount_t     *mp,            /* mount point structure */
+       xfs_trans_t     *tp,            /* transaction pointer */
+       xfs_agnumber_t  agno,           /* allocation group number */
+       int             flags,          /* XFS_ALLOC_FLAG_... */
+       xfs_buf_t       **bpp)          /* buffer for the ag freelist header */
+{
+       xfs_agf_t       *agf;           /* ag freelist header */
+       int             agf_ok;         /* set if agf is consistent */
+       xfs_buf_t       *bp;            /* return value */
+       xfs_daddr_t     d;              /* disk block address */
+       int             error;
+       xfs_perag_t     *pag;           /* per allocation group data */
+
+       ASSERT(agno != NULLAGNUMBER);
+       d = XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR);
+       if (error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, 1,
+                       (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XFS_BUF_TRYLOCK : 0U,
+                       &bp))
+               return error;
+       ASSERT(!bp || !XFS_BUF_GETERROR(bp));
+       if (!bp) {
+               *bpp = NULL;
+               return 0;
+       }
+       /*
+        * Validate the magic number of the agf block.
+        */
+       agf = XFS_BUF_TO_AGF(bp);
+       agf_ok =
+               INT_GET(agf->agf_magicnum, ARCH_CONVERT) == XFS_AGF_MAGIC &&
+               XFS_AGF_GOOD_VERSION(INT_GET(agf->agf_versionnum, ARCH_CONVERT)) &&
+               INT_GET(agf->agf_freeblks, ARCH_CONVERT) <=
+                               INT_GET(agf->agf_length, ARCH_CONVERT) &&
+               INT_GET(agf->agf_flfirst, ARCH_CONVERT) < XFS_AGFL_SIZE &&
+               INT_GET(agf->agf_fllast,  ARCH_CONVERT) < XFS_AGFL_SIZE &&
+               INT_GET(agf->agf_flcount, ARCH_CONVERT) <= XFS_AGFL_SIZE;
+       if (XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF,
+                       XFS_RANDOM_ALLOC_READ_AGF)) {
+               xfs_trans_brelse(tp, bp);
+               return XFS_ERROR(EFSCORRUPTED);
+       }
+       pag = &mp->m_perag[agno];
+       if (!pag->pagf_init) {
+               pag->pagf_freeblks = INT_GET(agf->agf_freeblks, ARCH_CONVERT);
+               pag->pagf_flcount = INT_GET(agf->agf_flcount, ARCH_CONVERT);
+               pag->pagf_longest = INT_GET(agf->agf_longest, ARCH_CONVERT);
+               pag->pagf_levels[XFS_BTNUM_BNOi] =
+                       INT_GET(agf->agf_levels[XFS_BTNUM_BNOi], ARCH_CONVERT);
+               pag->pagf_levels[XFS_BTNUM_CNTi] =
+                       INT_GET(agf->agf_levels[XFS_BTNUM_CNTi], ARCH_CONVERT);
+               pag->pagf_init = 1;
+       }
+#ifdef DEBUG
+       else if (!XFS_FORCED_SHUTDOWN(mp)) {
+               ASSERT(pag->pagf_freeblks == INT_GET(agf->agf_freeblks, ARCH_CONVERT));
+               ASSERT(pag->pagf_flcount == INT_GET(agf->agf_flcount, ARCH_CONVERT));
+               ASSERT(pag->pagf_longest == INT_GET(agf->agf_longest, ARCH_CONVERT));
+               ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
+                      INT_GET(agf->agf_levels[XFS_BTNUM_BNOi], ARCH_CONVERT));
+               ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] ==
+                      INT_GET(agf->agf_levels[XFS_BTNUM_CNTi], ARCH_CONVERT));
+       }
+#endif
+       XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGF, XFS_AGF_REF);
+       *bpp = bp;
+       return 0;
+}
+
+/*
+ * Allocate an extent (variable-size).
+ * Depending on the allocation type, we either look in a single allocation
+ * group or loop over the allocation groups to find the result.
+ */
+int                            /* error */
+xfs_alloc_vextent(
+       xfs_alloc_arg_t *args)  /* allocation argument structure */
+{
+       xfs_agblock_t   agsize; /* allocation group size */
+       int             error;
+       int             flags;  /* XFS_ALLOC_FLAG_... locking flags */
+#ifdef XFS_ALLOC_TRACE
+       static char     fname[] = "xfs_alloc_vextent";
+#endif
+       xfs_extlen_t    minleft;/* minimum left value, temp copy */
+       xfs_mount_t     *mp;    /* mount structure pointer */
+       xfs_agnumber_t  sagno;  /* starting allocation group number */
+       xfs_alloctype_t type;   /* input allocation type */
+
+       mp = args->mp;
+       type = args->otype = args->type;
+       args->agbno = NULLAGBLOCK;
+       /*
+        * Just fix this up, for the case where the last a.g. is shorter
+        * (or there's only one a.g.) and the caller couldn't easily figure
+        * that out (xfs_bmap_alloc).
+        */
+       agsize = mp->m_sb.sb_agblocks;
+       if (args->maxlen > agsize)
+               args->maxlen = agsize;
+       if (args->alignment == 0)
+               args->alignment = 1;
+       ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount);
+       ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize);
+       ASSERT(args->minlen <= args->maxlen);
+       ASSERT(args->minlen <= agsize);
+       ASSERT(args->mod < args->prod);
+       if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount ||
+           XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize ||
+           args->minlen > args->maxlen || args->minlen > agsize ||
+           args->mod >= args->prod) {
+               args->fsbno = NULLFSBLOCK;
+               TRACE_ALLOC("badargs", args);
+               return 0;
+       }
+       switch (type) {
+       case XFS_ALLOCTYPE_THIS_AG:
+       case XFS_ALLOCTYPE_NEAR_BNO:
+       case XFS_ALLOCTYPE_THIS_BNO:
+               /*
+                * These three force us into a single a.g.
+                */
+               args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
+               mrlock(&mp->m_peraglock, MR_ACCESS, PINOD);
+               args->pag = &mp->m_perag[args->agno];
+               minleft = args->minleft;
+               args->minleft = 0;
+               error = xfs_alloc_fix_freelist(args, 0);
+               args->minleft = minleft;
+               if (error) {
+                       TRACE_ALLOC("nofix", args);
+                       goto error0;
+               }
+               if (!args->agbp) {
+                       mrunlock(&mp->m_peraglock);
+                       TRACE_ALLOC("noagbp", args);
+                       break;
+               }
+               args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
+               if (error = xfs_alloc_ag_vextent(args))
+                       goto error0;
+               mrunlock(&mp->m_peraglock);
+               break;
+       case XFS_ALLOCTYPE_START_BNO:
+               /*
+                * Try near allocation first, then anywhere-in-ag after
+                * the first a.g. fails.
+                */
+               args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
+               args->type = XFS_ALLOCTYPE_NEAR_BNO;
+               /* FALLTHROUGH */
+       case XFS_ALLOCTYPE_ANY_AG:
+       case XFS_ALLOCTYPE_START_AG:
+       case XFS_ALLOCTYPE_FIRST_AG:
+               /*
+                * Rotate through the allocation groups looking for a winner.
+                */
+               if (type == XFS_ALLOCTYPE_ANY_AG) {
+                       /*
+                        * Start with the last place we left off.
+                        */
+                       args->agno = sagno = mp->m_agfrotor;
+                       args->type = XFS_ALLOCTYPE_THIS_AG;
+                       flags = XFS_ALLOC_FLAG_TRYLOCK;
+               } else if (type == XFS_ALLOCTYPE_FIRST_AG) {
+                       /*
+                        * Start with allocation group given by bno.
+                        */
+                       args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
+                       args->type = XFS_ALLOCTYPE_THIS_AG;
+                       sagno = 0;
+                       flags = 0;
+               } else {
+                       if (type == XFS_ALLOCTYPE_START_AG)
+                               args->type = XFS_ALLOCTYPE_THIS_AG;
+                       /*
+                        * Start with the given allocation group.
+                        */
+                       args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno);
+                       flags = XFS_ALLOC_FLAG_TRYLOCK;
+               }
+               /*
+                * Loop over allocation groups twice; first time with
+                * trylock set, second time without.
+                */
+               for (;;) {
+                       mrlock(&mp->m_peraglock, MR_ACCESS, PINOD);
+                       args->pag = &mp->m_perag[args->agno];
+                       if (error = xfs_alloc_fix_freelist(args, flags)) {
+                               TRACE_ALLOC("nofix", args);
+                               goto error0;
+                       }
+                       /*
+                        * If we get a buffer back then the allocation will fly.
+                        */
+                       if (args->agbp) {
+                               if (error = xfs_alloc_ag_vextent(args))
+                                       goto error0;
+                               mrunlock(&mp->m_peraglock);
+                               break;
+                       }
+                       mrunlock(&mp->m_peraglock);
+                       TRACE_ALLOC("loopfailed", args);
+                       /*
+                        * Didn't work, figure out the next iteration.
+                        */
+                       if (args->agno == sagno &&
+                           type == XFS_ALLOCTYPE_START_BNO)
+                               args->type = XFS_ALLOCTYPE_THIS_AG;
+                       if (++(args->agno) == mp->m_sb.sb_agcount)
+                               args->agno = 0;
+                       /* 
+                        * Reached the starting a.g., must either be done
+                        * or switch to non-trylock mode.
+                        */
+                       if (args->agno == sagno) {
+                               if (flags == 0) {
+                                       args->agbno = NULLAGBLOCK;
+                                       TRACE_ALLOC("allfailed", args);
+                                       break;
+                               }
+                               flags = 0;
+                               if (type == XFS_ALLOCTYPE_START_BNO) {
+                                       args->agbno = XFS_FSB_TO_AGBNO(mp,
+                                               args->fsbno);
+                                       args->type = XFS_ALLOCTYPE_NEAR_BNO;
+                               }
+                       }
+               }
+               mp->m_agfrotor = (args->agno + 1) % mp->m_sb.sb_agcount;
+               break;
+       default:
+               ASSERT(0);
+               /* NOTREACHED */
+       }
+       if (args->agbno == NULLAGBLOCK)
+               args->fsbno = NULLFSBLOCK;
+       else {
+               args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
+#ifdef DEBUG
+               ASSERT(args->len >= args->minlen);
+               ASSERT(args->len <= args->maxlen);
+               ASSERT(args->agbno % args->alignment == 0);
+               XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno),
+                       args->len);
+#endif
+       }
+       return 0;
+error0:
+       mrunlock(&mp->m_peraglock);
+       return error;
+}
+
+/*
+ * Free an extent.
+ * Just break up the extent address and hand off to xfs_free_ag_extent
+ * after fixing up the freelist.
+ */
+int                            /* error */
+xfs_free_extent(
+       xfs_trans_t     *tp,    /* transaction pointer */
+       xfs_fsblock_t   bno,    /* starting block number of extent */
+       xfs_extlen_t    len)    /* length of extent */
+{
+#ifdef DEBUG
+       xfs_agf_t       *agf;   /* a.g. freespace header */
+#endif
+       xfs_alloc_arg_t args;   /* allocation argument structure */
+       int             error;
+
+       ASSERT(len != 0);
+       args.tp = tp;
+       args.mp = tp->t_mountp;
+       args.agno = XFS_FSB_TO_AGNO(args.mp, bno);
+       ASSERT(args.agno < args.mp->m_sb.sb_agcount);
+       args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno);
+       args.alignment = 1;
+       args.minlen = args.minleft = args.minalignslop = 0;
+       mrlock(&args.mp->m_peraglock, MR_ACCESS, PINOD);
+       args.pag = &args.mp->m_perag[args.agno];
+       if (error = xfs_alloc_fix_freelist(&args, 0))
+               goto error0;
+#ifdef DEBUG
+       ASSERT(args.agbp != NULL);
+       agf = XFS_BUF_TO_AGF(args.agbp);
+       ASSERT(args.agbno + len <= INT_GET(agf->agf_length, ARCH_CONVERT));
+#endif
+       error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno,
+               len, 0);
+error0:
+       mrunlock(&args.mp->m_peraglock);
+       return error;
+}
diff --git a/libxfs/xfs_alloc_btree.c b/libxfs/xfs_alloc_btree.c
new file mode 100644 (file)
index 0000000..c6d0e0f
--- /dev/null
@@ -0,0 +1,2136 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+/*
+ * Free space allocation for XFS.
+ */
+
+#include <xfs.h>
+
+/*
+ * Single level of the xfs_alloc_delete record deletion routine.
+ * Delete record pointed to by cur/level.
+ * Remove the record from its block then rebalance the tree.
+ * Return 0 for error, 1 for done, 2 to go on to the next level.
+ */
+STATIC int                             /* error */
+xfs_alloc_delrec(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       int                     level,  /* level removing record from */
+       int                     *stat)  /* fail/done/go-on */
+{
+       xfs_agf_t               *agf;   /* allocation group freelist header */
+       xfs_alloc_block_t       *block; /* btree block record/key lives in */
+       xfs_agblock_t           bno;    /* btree block number */
+       xfs_buf_t               *bp;    /* buffer for block */
+       int                     error;  /* error return value */
+       int                     i;      /* loop index */
+       xfs_alloc_key_t         key;    /* kp points here if block is level 0 */
+       xfs_agblock_t           lbno;   /* left block's block number */
+       xfs_buf_t               *lbp;   /* left block's buffer pointer */
+       xfs_alloc_block_t       *left;  /* left btree block */
+       xfs_alloc_key_t         *lkp;   /* left block key pointer */
+       xfs_alloc_ptr_t         *lpp;   /* left block address pointer */
+       int                     lrecs;  /* number of records in left block */
+       xfs_alloc_rec_t         *lrp;   /* left block record pointer */
+       xfs_mount_t             *mp;    /* mount structure */
+       int                     ptr;    /* index in btree block for this rec */
+       xfs_agblock_t           rbno;   /* right block's block number */
+       xfs_buf_t               *rbp;   /* right block's buffer pointer */
+       xfs_alloc_block_t       *right; /* right btree block */
+       xfs_alloc_key_t         *rkp;   /* right block key pointer */
+       xfs_alloc_ptr_t         *rpp;   /* right block address pointer */
+       int                     rrecs;  /* number of records in right block */
+       xfs_alloc_rec_t         *rrp;   /* right block record pointer */
+       xfs_btree_cur_t         *tcur;  /* temporary btree cursor */
+
+       /*
+        * Get the index of the entry being deleted, check for nothing there.
+        */
+       ptr = cur->bc_ptrs[level];
+       if (ptr == 0) {
+               *stat = 0;
+               return 0;
+       }
+       /*
+        * Get the buffer & block containing the record or key/ptr.
+        */
+       bp = cur->bc_bufs[level];
+       block = XFS_BUF_TO_ALLOC_BLOCK(bp);
+#ifdef DEBUG
+       if (error = xfs_btree_check_sblock(cur, block, level, bp))
+               return error;
+#endif
+       /*
+        * Fail if we're off the end of the block.
+        */
+       if (ptr > INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
+               *stat = 0;
+               return 0;
+       }
+       XFS_STATS_INC(xs_abt_delrec);
+       /*
+        * It's a nonleaf.  Excise the key and ptr being deleted, by
+        * sliding the entries past them down one.
+        * Log the changed areas of the block.
+        */
+       if (level > 0) {
+               lkp = XFS_ALLOC_KEY_ADDR(block, 1, cur);
+               lpp = XFS_ALLOC_PTR_ADDR(block, 1, cur);
+#ifdef DEBUG
+               for (i = ptr; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++) {
+                       if (error = xfs_btree_check_sptr(cur, INT_GET(lpp[i], ARCH_CONVERT), level))
+                               return error;
+               }
+#endif
+               if (ptr < INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
+                       ovbcopy(&lkp[ptr], &lkp[ptr - 1],
+                               (INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr) * sizeof(*lkp)); /* INT_: mem copy */
+                       ovbcopy(&lpp[ptr], &lpp[ptr - 1],
+                               (INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr) * sizeof(*lpp)); /* INT_: mem copy */
+                       xfs_alloc_log_ptrs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT) - 1);
+                       xfs_alloc_log_keys(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT) - 1);
+               }
+       }
+       /*
+        * It's a leaf.  Excise the record being deleted, by sliding the
+        * entries past it down one.  Log the changed areas of the block.
+        */
+       else {
+               lrp = XFS_ALLOC_REC_ADDR(block, 1, cur);
+               if (ptr < INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
+                       ovbcopy(&lrp[ptr], &lrp[ptr - 1],
+                               (INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr) * sizeof(*lrp));
+                       xfs_alloc_log_recs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT) - 1);
+               }
+               /*
+                * If it's the first record in the block, we'll need a key
+                * structure to pass up to the next level (updkey).
+                */
+               if (ptr == 1) {
+                       key.ar_startblock = lrp->ar_startblock; /* INT_: direct copy */
+                       key.ar_blockcount = lrp->ar_blockcount; /* INT_: direct copy */
+                       lkp = &key;
+               }
+       }
+       /*
+        * Decrement and log the number of entries in the block.
+        */
+       INT_MOD(block->bb_numrecs, ARCH_CONVERT, -1);
+       xfs_alloc_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
+       /*
+        * See if the longest free extent in the allocation group was
+        * changed by this operation.  True if it's the by-size btree, and
+        * this is the leaf level, and there is no right sibling block,
+        * and this was the last record.
+        */
+       agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
+       mp = cur->bc_mp;
+
+       if (level == 0 &&
+           cur->bc_btnum == XFS_BTNUM_CNT &&
+           INT_GET(block->bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK &&
+           ptr > INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
+               ASSERT(ptr == INT_GET(block->bb_numrecs, ARCH_CONVERT) + 1);
+               /*
+                * There are still records in the block.  Grab the size
+                * from the last one.
+                */
+               if (INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
+                       rrp = XFS_ALLOC_REC_ADDR(block, INT_GET(block->bb_numrecs, ARCH_CONVERT), cur);
+                       INT_COPY(agf->agf_longest, rrp->ar_blockcount, ARCH_CONVERT);
+               }
+               /*
+                * No free extents left.
+                */
+               else
+                       INT_ZERO(agf->agf_longest, ARCH_CONVERT);
+               mp->m_perag[INT_GET(agf->agf_seqno, ARCH_CONVERT)].pagf_longest =
+                       INT_GET(agf->agf_longest, ARCH_CONVERT);
+               xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
+                       XFS_AGF_LONGEST);
+       }
+       /*
+        * Is this the root level?  If so, we're almost done.
+        */
+       if (level == cur->bc_nlevels - 1) {
+               /*
+                * If this is the root level,
+                * and there's only one entry left,
+                * and it's NOT the leaf level,
+                * then we can get rid of this level.
+                */
+               if (INT_GET(block->bb_numrecs, ARCH_CONVERT) == 1 && level > 0) {
+                       /*
+                        * lpp is still set to the first pointer in the block.
+                        * Make it the new root of the btree.
+                        */
+                       bno = INT_GET(agf->agf_roots[cur->bc_btnum], ARCH_CONVERT);
+                       INT_COPY(agf->agf_roots[cur->bc_btnum], *lpp, ARCH_CONVERT);
+                       INT_MOD(agf->agf_levels[cur->bc_btnum], ARCH_CONVERT, -1);
+                       mp->m_perag[INT_GET(agf->agf_seqno, ARCH_CONVERT)].pagf_levels[cur->bc_btnum]--;
+                       /*
+                        * Put this buffer/block on the ag's freelist.
+                        */
+                       if (error = xfs_alloc_put_freelist(cur->bc_tp,
+                                       cur->bc_private.a.agbp, NULL, bno))
+                               return error;
+                       xfs_trans_agbtree_delta(cur->bc_tp, -1);
+                       xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
+                               XFS_AGF_ROOTS | XFS_AGF_LEVELS);
+                       /*
+                        * Update the cursor so there's one fewer level.
+                        */
+                       xfs_btree_setbuf(cur, level, 0);
+                       cur->bc_nlevels--;
+               } else if (level > 0 &&
+                          (error = xfs_alloc_decrement(cur, level, &i)))
+                       return error;
+               *stat = 1;
+               return 0;
+       }
+       /*
+        * If we deleted the leftmost entry in the block, update the
+        * key values above us in the tree.
+        */
+       if (ptr == 1 && (error = xfs_alloc_updkey(cur, lkp, level + 1)))
+               return error;
+       /*
+        * If the number of records remaining in the block is at least
+        * the minimum, we're done.
+        */
+       if (INT_GET(block->bb_numrecs, ARCH_CONVERT) >= XFS_ALLOC_BLOCK_MINRECS(level, cur)) {
+               if (level > 0 && (error = xfs_alloc_decrement(cur, level, &i)))
+                       return error;
+               *stat = 1;
+               return 0;
+       }
+       /*
+        * Otherwise, we have to move some records around to keep the
+        * tree balanced.  Look at the left and right sibling blocks to
+        * see if we can re-balance by moving only one record.
+        */
+       rbno = INT_GET(block->bb_rightsib, ARCH_CONVERT);
+       lbno = INT_GET(block->bb_leftsib, ARCH_CONVERT);
+       bno = NULLAGBLOCK;
+       ASSERT(rbno != NULLAGBLOCK || lbno != NULLAGBLOCK);
+       /*
+        * Duplicate the cursor so our btree manipulations here won't
+        * disrupt the next level up.
+        */
+       if (error = xfs_btree_dup_cursor(cur, &tcur))
+               return error;
+       /*
+        * If there's a right sibling, see if it's ok to shift an entry
+        * out of it.
+        */
+       if (rbno != NULLAGBLOCK) {
+               /*
+                * Move the temp cursor to the last entry in the next block.
+                * Actually any entry but the first would suffice.
+                */
+               i = xfs_btree_lastrec(tcur, level);
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               if (error = xfs_alloc_increment(tcur, level, &i))
+                       goto error0;
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               i = xfs_btree_lastrec(tcur, level);
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               /*
+                * Grab a pointer to the block.
+                */
+               rbp = tcur->bc_bufs[level];
+               right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
+#ifdef DEBUG
+               if (error = xfs_btree_check_sblock(cur, right, level, rbp))
+                       goto error0;
+#endif
+               /*
+                * Grab the current block number, for future use.
+                */
+               bno = INT_GET(right->bb_leftsib, ARCH_CONVERT);
+               /*
+                * If right block is full enough so that removing one entry
+                * won't make it too empty, and left-shifting an entry out
+                * of right to us works, we're done.
+                */
+               if (INT_GET(right->bb_numrecs, ARCH_CONVERT) - 1 >=
+                    XFS_ALLOC_BLOCK_MINRECS(level, cur)) {
+                       if (error = xfs_alloc_lshift(tcur, level, &i))
+                               goto error0;
+                       if (i) {
+                               ASSERT(INT_GET(block->bb_numrecs, ARCH_CONVERT) >=
+                                      XFS_ALLOC_BLOCK_MINRECS(level, cur));
+                               xfs_btree_del_cursor(tcur,
+                                                    XFS_BTREE_NOERROR);
+                               if (level > 0 &&
+                                   (error = xfs_alloc_decrement(cur, level,
+                                           &i)))
+                                       return error;
+                               *stat = 1;
+                               return 0;
+                       }
+               }
+               /*
+                * Otherwise, grab the number of records in right for
+                * future reference, and fix up the temp cursor to point 
+                * to our block again (last record).
+                */
+               rrecs = INT_GET(right->bb_numrecs, ARCH_CONVERT);
+               if (lbno != NULLAGBLOCK) {
+                       i = xfs_btree_firstrec(tcur, level);
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+                       if (error = xfs_alloc_decrement(tcur, level, &i))
+                               goto error0;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               }
+       }
+       /*
+        * If there's a left sibling, see if it's ok to shift an entry
+        * out of it.
+        */
+       if (lbno != NULLAGBLOCK) {
+               /*
+                * Move the temp cursor to the first entry in the
+                * previous block.
+                */
+               i = xfs_btree_firstrec(tcur, level);
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               if (error = xfs_alloc_decrement(tcur, level, &i))
+                       goto error0;
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               xfs_btree_firstrec(tcur, level);
+               /*
+                * Grab a pointer to the block.
+                */
+               lbp = tcur->bc_bufs[level];
+               left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
+#ifdef DEBUG
+               if (error = xfs_btree_check_sblock(cur, left, level, lbp))
+                       goto error0;
+#endif
+               /*
+                * Grab the current block number, for future use.
+                */
+               bno = INT_GET(left->bb_rightsib, ARCH_CONVERT);
+               /*
+                * If left block is full enough so that removing one entry
+                * won't make it too empty, and right-shifting an entry out
+                * of left to us works, we're done.
+                */
+               if (INT_GET(left->bb_numrecs, ARCH_CONVERT) - 1 >=
+                    XFS_ALLOC_BLOCK_MINRECS(level, cur)) {
+                       if (error = xfs_alloc_rshift(tcur, level, &i))
+                               goto error0;
+                       if (i) {
+                               ASSERT(INT_GET(block->bb_numrecs, ARCH_CONVERT) >=
+                                      XFS_ALLOC_BLOCK_MINRECS(level, cur));
+                               xfs_btree_del_cursor(tcur,
+                                                    XFS_BTREE_NOERROR);
+                               if (level == 0)
+                                       cur->bc_ptrs[0]++;
+                               *stat = 1;
+                               return 0;
+                       }
+               }
+               /*
+                * Otherwise, grab the number of records in right for
+                * future reference.
+                */
+               lrecs = INT_GET(left->bb_numrecs, ARCH_CONVERT);
+       }
+       /*
+        * Delete the temp cursor, we're done with it.
+        */
+       xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
+       /*
+        * If here, we need to do a join to keep the tree balanced.
+        */
+       ASSERT(bno != NULLAGBLOCK);
+       /*
+        * See if we can join with the left neighbor block.
+        */
+       if (lbno != NULLAGBLOCK &&
+           lrecs + INT_GET(block->bb_numrecs, ARCH_CONVERT) <= XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
+               /*
+                * Set "right" to be the starting block,
+                * "left" to be the left neighbor.
+                */
+               rbno = bno;
+               right = block;
+               rbp = bp;
+               if (error = xfs_btree_read_bufs(mp, cur->bc_tp,
+                               cur->bc_private.a.agno, lbno, 0, &lbp,
+                               XFS_ALLOC_BTREE_REF))
+                       return error;
+               left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
+               if (error = xfs_btree_check_sblock(cur, left, level, lbp))
+                       return error;
+       }
+       /*
+        * If that won't work, see if we can join with the right neighbor block.
+        */
+       else if (rbno != NULLAGBLOCK &&
+                rrecs + INT_GET(block->bb_numrecs, ARCH_CONVERT) <=
+                 XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
+               /*
+                * Set "left" to be the starting block,
+                * "right" to be the right neighbor.
+                */
+               lbno = bno;
+               left = block;
+               lbp = bp;
+               if (error = xfs_btree_read_bufs(mp, cur->bc_tp,
+                               cur->bc_private.a.agno, rbno, 0, &rbp,
+                               XFS_ALLOC_BTREE_REF))
+                       return error;
+               right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
+               if (error = xfs_btree_check_sblock(cur, right, level, rbp))
+                       return error;
+       }
+       /*
+        * Otherwise, we can't fix the imbalance.
+        * Just return.  This is probably a logic error, but it's not fatal.
+        */
+       else {
+               if (level > 0 && (error = xfs_alloc_decrement(cur, level, &i)))
+                       return error;
+               *stat = 1;
+               return 0;
+       }
+       /*
+        * We're now going to join "left" and "right" by moving all the stuff
+        * in "right" to "left" and deleting "right".
+        */
+       if (level > 0) {
+               /*
+                * It's a non-leaf.  Move keys and pointers.
+                */
+               lkp = XFS_ALLOC_KEY_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, cur);
+               lpp = XFS_ALLOC_PTR_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, cur);
+               rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur);
+               rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur);
+#ifdef DEBUG
+               for (i = 0; i < INT_GET(right->bb_numrecs, ARCH_CONVERT); i++) {
+                       if (error = xfs_btree_check_sptr(cur, INT_GET(rpp[i], ARCH_CONVERT), level))
+                               return error;
+               }
+#endif
+               bcopy(rkp, lkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lkp)); /* INT_: structure copy */
+               bcopy(rpp, lpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lpp)); /* INT_: structure copy */
+               xfs_alloc_log_keys(cur, lbp, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1,
+                                  INT_GET(left->bb_numrecs, ARCH_CONVERT) + INT_GET(right->bb_numrecs, ARCH_CONVERT));
+               xfs_alloc_log_ptrs(cur, lbp, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1,
+                                  INT_GET(left->bb_numrecs, ARCH_CONVERT) + INT_GET(right->bb_numrecs, ARCH_CONVERT));
+       } else {
+               /*
+                * It's a leaf.  Move records.
+                */
+               lrp = XFS_ALLOC_REC_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, cur);
+               rrp = XFS_ALLOC_REC_ADDR(right, 1, cur);
+               bcopy(rrp, lrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lrp));
+               xfs_alloc_log_recs(cur, lbp, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1,
+                                  INT_GET(left->bb_numrecs, ARCH_CONVERT) + INT_GET(right->bb_numrecs, ARCH_CONVERT));
+       }
+       /*
+        * If we joined with the left neighbor, set the buffer in the
+        * cursor to the left block, and fix up the index.
+        */
+       if (bp != lbp) {
+               xfs_btree_setbuf(cur, level, lbp);
+               cur->bc_ptrs[level] += INT_GET(left->bb_numrecs, ARCH_CONVERT);
+       }
+       /*
+        * If we joined with the right neighbor and there's a level above
+        * us, increment the cursor at that level.
+        */
+       else if (level + 1 < cur->bc_nlevels &&
+                (error = xfs_alloc_increment(cur, level + 1, &i)))
+               return error;
+       /*
+        * Fix up the number of records in the surviving block.
+        */
+       INT_MOD(left->bb_numrecs, ARCH_CONVERT, INT_GET(right->bb_numrecs, ARCH_CONVERT));
+       /*
+        * Fix up the right block pointer in the surviving block, and log it.
+        */
+       left->bb_rightsib = right->bb_rightsib; /* INT_: direct copy */
+       xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
+       /*
+        * If there is a right sibling now, make it point to the 
+        * remaining block.
+        */
+       if (INT_GET(left->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) {
+               xfs_alloc_block_t       *rrblock;
+               xfs_buf_t                       *rrbp;
+
+               if (error = xfs_btree_read_bufs(mp, cur->bc_tp,
+                               cur->bc_private.a.agno, INT_GET(left->bb_rightsib, ARCH_CONVERT), 0,
+                               &rrbp, XFS_ALLOC_BTREE_REF))
+                       return error;
+               rrblock = XFS_BUF_TO_ALLOC_BLOCK(rrbp);
+               if (error = xfs_btree_check_sblock(cur, rrblock, level, rrbp))
+                       return error;
+               INT_SET(rrblock->bb_leftsib, ARCH_CONVERT, lbno);
+               xfs_alloc_log_block(cur->bc_tp, rrbp, XFS_BB_LEFTSIB);
+       }
+       /*
+        * Free the deleting block by putting it on the freelist.
+        */
+       if (error = xfs_alloc_put_freelist(cur->bc_tp, cur->bc_private.a.agbp,
+                       NULL, rbno))
+               return error;
+       xfs_trans_agbtree_delta(cur->bc_tp, -1);
+       /*
+        * Adjust the current level's cursor so that we're left referring
+        * to the right node, after we're done.
+        * If this leaves the ptr value 0 our caller will fix it up.
+        */
+       if (level > 0)
+               cur->bc_ptrs[level]--;
+       /* 
+        * Return value means the next level up has something to do.
+        */
+       *stat = 2;
+       return 0;
+
+error0:
+       xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
+       return error;
+}
+
+/*
+ * Insert one record/level.  Return information to the caller
+ * allowing the next level up to proceed if necessary.
+ */
+STATIC int                             /* error */
+xfs_alloc_insrec(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       int                     level,  /* level to insert record at */
+       xfs_agblock_t           *bnop,  /* i/o: block number inserted */
+       xfs_alloc_rec_t         *recp,  /* i/o: record data inserted */
+       xfs_btree_cur_t         **curp, /* output: new cursor replacing cur */
+       int                     *stat)  /* output: success/failure */
+{
+       xfs_agf_t               *agf;   /* allocation group freelist header */
+       xfs_alloc_block_t       *block; /* btree block record/key lives in */
+       xfs_buf_t               *bp;    /* buffer for block */
+       int                     error;  /* error return value */
+       int                     i;      /* loop index */
+       xfs_alloc_key_t         key;    /* key value being inserted */
+       xfs_alloc_key_t         *kp;    /* pointer to btree keys */
+       xfs_agblock_t           nbno;   /* block number of allocated block */
+       xfs_btree_cur_t         *ncur;  /* new cursor to be used at next lvl */
+       xfs_alloc_key_t         nkey;   /* new key value, from split */
+       xfs_alloc_rec_t         nrec;   /* new record value, for caller */
+       int                     optr;   /* old ptr value */
+       xfs_alloc_ptr_t         *pp;    /* pointer to btree addresses */
+       int                     ptr;    /* index in btree block for this rec */
+       xfs_alloc_rec_t         *rp;    /* pointer to btree records */
+
+       ASSERT(INT_GET(recp->ar_blockcount, ARCH_CONVERT) > 0);
+       /*
+        * If we made it to the root level, allocate a new root block
+        * and we're done.
+        */
+       if (level >= cur->bc_nlevels) {
+               XFS_STATS_INC(xs_abt_insrec);
+               if (error = xfs_alloc_newroot(cur, &i))
+                       return error;
+               *bnop = NULLAGBLOCK;
+               *stat = i;
+               return 0;
+       }
+       /*
+        * Make a key out of the record data to be inserted, and save it.
+        */
+       key.ar_startblock = recp->ar_startblock; /* INT_: direct copy */
+       key.ar_blockcount = recp->ar_blockcount; /* INT_: direct copy */
+       optr = ptr = cur->bc_ptrs[level];
+       /*
+        * If we're off the left edge, return failure.
+        */
+       if (ptr == 0) {
+               *stat = 0;
+               return 0;
+       }
+       XFS_STATS_INC(xs_abt_insrec);
+       /*
+        * Get pointers to the btree buffer and block.
+        */
+       bp = cur->bc_bufs[level];
+       block = XFS_BUF_TO_ALLOC_BLOCK(bp);
+#ifdef DEBUG
+       if (error = xfs_btree_check_sblock(cur, block, level, bp))
+               return error;
+       /* 
+        * Check that the new entry is being inserted in the right place.
+        */
+       if (ptr <= INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
+               if (level == 0) {
+                       rp = XFS_ALLOC_REC_ADDR(block, ptr, cur);
+                       xfs_btree_check_rec(cur->bc_btnum, recp, rp);
+               } else {
+                       kp = XFS_ALLOC_KEY_ADDR(block, ptr, cur);
+                       xfs_btree_check_key(cur->bc_btnum, &key, kp);
+               }
+       }
+#endif
+       nbno = NULLAGBLOCK;
+       ncur = (xfs_btree_cur_t *)0;
+       /*
+        * If the block is full, we can't insert the new entry until we
+        * make the block un-full.
+        */
+       if (INT_GET(block->bb_numrecs, ARCH_CONVERT) == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
+               /*
+                * First, try shifting an entry to the right neighbor.
+                */
+               if (error = xfs_alloc_rshift(cur, level, &i))
+                       return error;
+               if (i) {
+                       /* nothing */
+               }
+               /*
+                * Next, try shifting an entry to the left neighbor.
+                */
+               else {
+                       if (error = xfs_alloc_lshift(cur, level, &i))
+                               return error;
+                       if (i)
+                               optr = ptr = cur->bc_ptrs[level];
+                       else {
+                               /*
+                                * Next, try splitting the current block in
+                                * half. If this works we have to re-set our
+                                * variables because we could be in a
+                                * different block now.
+                                */
+                               if (error = xfs_alloc_split(cur, level, &nbno,
+                                               &nkey, &ncur, &i))
+                                       return error;
+                               if (i) {
+                                       bp = cur->bc_bufs[level];
+                                       block = XFS_BUF_TO_ALLOC_BLOCK(bp);
+#ifdef DEBUG
+                                       if (error =
+                                               xfs_btree_check_sblock(cur,
+                                                       block, level, bp))
+                                               return error;
+#endif
+                                       ptr = cur->bc_ptrs[level];
+                                       nrec.ar_startblock = nkey.ar_startblock; /* INT_: direct copy */
+                                       nrec.ar_blockcount = nkey.ar_blockcount; /* INT_: direct copy */
+                               }
+                               /*
+                                * Otherwise the insert fails.
+                                */
+                               else {
+                                       *stat = 0;
+                                       return 0;
+                               }
+                       }
+               }
+       }
+       /*
+        * At this point we know there's room for our new entry in the block
+        * we're pointing at.
+        */
+       if (level > 0) {
+               /*
+                * It's a non-leaf entry.  Make a hole for the new data
+                * in the key and ptr regions of the block.
+                */
+               kp = XFS_ALLOC_KEY_ADDR(block, 1, cur);
+               pp = XFS_ALLOC_PTR_ADDR(block, 1, cur);
+#ifdef DEBUG
+               for (i = INT_GET(block->bb_numrecs, ARCH_CONVERT); i >= ptr; i--) {
+                       if (error = xfs_btree_check_sptr(cur, INT_GET(pp[i - 1], ARCH_CONVERT), level))
+                               return error;
+               }
+#endif
+               ovbcopy(&kp[ptr - 1], &kp[ptr],
+                       (INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr + 1) * sizeof(*kp)); /* INT_: copy */
+               ovbcopy(&pp[ptr - 1], &pp[ptr],
+                       (INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr + 1) * sizeof(*pp)); /* INT_: copy */
+#ifdef DEBUG
+               if (error = xfs_btree_check_sptr(cur, *bnop, level))
+                       return error;
+#endif
+               /*
+                * Now stuff the new data in, bump numrecs and log the new data.
+                */
+               kp[ptr - 1] = key;
+               INT_SET(pp[ptr - 1], ARCH_CONVERT, *bnop);
+               INT_MOD(block->bb_numrecs, ARCH_CONVERT, +1);
+               xfs_alloc_log_keys(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT));
+               xfs_alloc_log_ptrs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT));
+#ifdef DEBUG
+               if (ptr < INT_GET(block->bb_numrecs, ARCH_CONVERT))
+                       xfs_btree_check_key(cur->bc_btnum, kp + ptr - 1,
+                               kp + ptr);
+#endif
+       } else {
+               /*
+                * It's a leaf entry.  Make a hole for the new record.
+                */
+               rp = XFS_ALLOC_REC_ADDR(block, 1, cur);
+               ovbcopy(&rp[ptr - 1], &rp[ptr],
+                       (INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr + 1) * sizeof(*rp));
+               /*
+                * Now stuff the new record in, bump numrecs
+                * and log the new data.
+                */
+               rp[ptr - 1] = *recp; /* INT_: struct copy */
+               INT_MOD(block->bb_numrecs, ARCH_CONVERT, +1);
+               xfs_alloc_log_recs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT));
+#ifdef DEBUG
+               if (ptr < INT_GET(block->bb_numrecs, ARCH_CONVERT))
+                       xfs_btree_check_rec(cur->bc_btnum, rp + ptr - 1,
+                               rp + ptr);
+#endif
+       }
+       /*
+        * Log the new number of records in the btree header.
+        */
+       xfs_alloc_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
+       /*
+        * If we inserted at the start of a block, update the parents' keys.
+        */
+       if (optr == 1 && (error = xfs_alloc_updkey(cur, &key, level + 1)))
+               return error;
+       /*
+        * Look to see if the longest extent in the allocation group
+        * needs to be updated.
+        */
+
+       agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
+       if (level == 0 &&
+           cur->bc_btnum == XFS_BTNUM_CNT &&
+           INT_GET(block->bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK &&
+           INT_GET(recp->ar_blockcount, ARCH_CONVERT) > INT_GET(agf->agf_longest, ARCH_CONVERT)) {
+               /*
+                * If this is a leaf in the by-size btree and there
+                * is no right sibling block and this block is bigger
+                * than the previous longest block, update it.
+                */
+               INT_COPY(agf->agf_longest, recp->ar_blockcount, ARCH_CONVERT);
+               cur->bc_mp->m_perag[INT_GET(agf->agf_seqno, ARCH_CONVERT)].pagf_longest
+                       = INT_GET(recp->ar_blockcount, ARCH_CONVERT);
+               xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
+                       XFS_AGF_LONGEST);
+       }
+       /*
+        * Return the new block number, if any.
+        * If there is one, give back a record value and a cursor too.
+        */
+       *bnop = nbno;
+       if (nbno != NULLAGBLOCK) {
+               *recp = nrec; /* INT_: struct copy */
+               *curp = ncur; /* INT_: struct copy */
+       }
+       *stat = 1;
+       return 0;
+}
+
+/*
+ * Log header fields from a btree block.
+ */
+STATIC void
+xfs_alloc_log_block(
+       xfs_trans_t             *tp,    /* transaction pointer */
+       xfs_buf_t               *bp,    /* buffer containing btree block */
+       int                     fields) /* mask of fields: XFS_BB_... */
+{
+       int                     first;  /* first byte offset logged */
+       int                     last;   /* last byte offset logged */
+       static const short      offsets[] = {   /* table of offsets */
+               offsetof(xfs_alloc_block_t, bb_magic),
+               offsetof(xfs_alloc_block_t, bb_level),
+               offsetof(xfs_alloc_block_t, bb_numrecs),
+               offsetof(xfs_alloc_block_t, bb_leftsib),
+               offsetof(xfs_alloc_block_t, bb_rightsib),
+               sizeof(xfs_alloc_block_t)
+       };
+
+       xfs_btree_offsets(fields, offsets, XFS_BB_NUM_BITS, &first, &last);
+       xfs_trans_log_buf(tp, bp, first, last);
+}
+
+/*
+ * Log keys from a btree block (nonleaf).
+ */
+STATIC void
+xfs_alloc_log_keys(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       xfs_buf_t                       *bp,    /* buffer containing btree block */
+       int                     kfirst, /* index of first key to log */
+       int                     klast)  /* index of last key to log */
+{
+       xfs_alloc_block_t       *block; /* btree block to log from */
+       int                     first;  /* first byte offset logged */
+       xfs_alloc_key_t         *kp;    /* key pointer in btree block */
+       int                     last;   /* last byte offset logged */
+
+       block = XFS_BUF_TO_ALLOC_BLOCK(bp);
+       kp = XFS_ALLOC_KEY_ADDR(block, 1, cur);
+       first = (int)((xfs_caddr_t)&kp[kfirst - 1] - (xfs_caddr_t)block);
+       last = (int)(((xfs_caddr_t)&kp[klast] - 1) - (xfs_caddr_t)block);
+       xfs_trans_log_buf(cur->bc_tp, bp, first, last);
+}
+
+/*
+ * Log block pointer fields from a btree block (nonleaf).
+ */
+STATIC void
+xfs_alloc_log_ptrs(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       xfs_buf_t                       *bp,    /* buffer containing btree block */
+       int                     pfirst, /* index of first pointer to log */
+       int                     plast)  /* index of last pointer to log */
+{
+       xfs_alloc_block_t       *block; /* btree block to log from */
+       int                     first;  /* first byte offset logged */
+       int                     last;   /* last byte offset logged */
+       xfs_alloc_ptr_t         *pp;    /* block-pointer pointer in btree blk */
+
+       block = XFS_BUF_TO_ALLOC_BLOCK(bp);
+       pp = XFS_ALLOC_PTR_ADDR(block, 1, cur);
+       first = (int)((xfs_caddr_t)&pp[pfirst - 1] - (xfs_caddr_t)block);
+       last = (int)(((xfs_caddr_t)&pp[plast] - 1) - (xfs_caddr_t)block);
+       xfs_trans_log_buf(cur->bc_tp, bp, first, last);
+}
+
+/*
+ * Log records from a btree block (leaf).
+ */
+STATIC void
+xfs_alloc_log_recs(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       xfs_buf_t               *bp,    /* buffer containing btree block */
+       int                     rfirst, /* index of first record to log */
+       int                     rlast)  /* index of last record to log */
+{
+       xfs_alloc_block_t       *block; /* btree block to log from */
+       int                     first;  /* first byte offset logged */
+       int                     last;   /* last byte offset logged */
+       xfs_alloc_rec_t         *rp;    /* record pointer for btree block */
+
+
+       block = XFS_BUF_TO_ALLOC_BLOCK(bp);
+       rp = XFS_ALLOC_REC_ADDR(block, 1, cur);
+#ifdef DEBUG
+       {
+               xfs_agf_t       *agf;
+               xfs_alloc_rec_t *p;
+
+               agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
+               for (p = &rp[rfirst - 1]; p <= &rp[rlast - 1]; p++)
+                       ASSERT(INT_GET(p->ar_startblock, ARCH_CONVERT) + INT_GET(p->ar_blockcount, ARCH_CONVERT) <=
+                              INT_GET(agf->agf_length, ARCH_CONVERT));
+       }
+#endif
+       first = (int)((xfs_caddr_t)&rp[rfirst - 1] - (xfs_caddr_t)block);
+       last = (int)(((xfs_caddr_t)&rp[rlast] - 1) - (xfs_caddr_t)block);
+       xfs_trans_log_buf(cur->bc_tp, bp, first, last);
+}
+
+/*
+ * Lookup the record.  The cursor is made to point to it, based on dir.
+ * Return 0 if can't find any such record, 1 for success.
+ */
+STATIC int                             /* error */
+xfs_alloc_lookup(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       xfs_lookup_t            dir,    /* <=, ==, or >= */
+       int                     *stat)  /* success/failure */
+{
+       xfs_agblock_t           agbno;  /* a.g. relative btree block number */
+       xfs_agnumber_t          agno;   /* allocation group number */
+       xfs_alloc_block_t       *block; /* current btree block */
+       int                     diff;   /* difference for the current key */
+       int                     error;  /* error return value */
+       int                     keyno;  /* current key number */
+       int                     level;  /* level in the btree */
+       xfs_mount_t             *mp;    /* file system mount point */
+
+       XFS_STATS_INC(xs_abt_lookup);
+       /*
+        * Get the allocation group header, and the root block number.
+        */
+       mp = cur->bc_mp;
+
+       {
+               xfs_agf_t       *agf;   /* a.g. freespace header */
+
+               agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
+               agno = INT_GET(agf->agf_seqno, ARCH_CONVERT);
+               agbno = INT_GET(agf->agf_roots[cur->bc_btnum], ARCH_CONVERT);
+       }
+       /*
+        * Iterate over each level in the btree, starting at the root.
+        * For each level above the leaves, find the key we need, based
+        * on the lookup record, then follow the corresponding block
+        * pointer down to the next level.
+        */
+       for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
+               xfs_buf_t       *bp;            /* buffer pointer for btree block */
+               xfs_daddr_t     d;              /* disk address of btree block */
+
+               /*
+                * Get the disk address we're looking for.
+                */
+               d = XFS_AGB_TO_DADDR(mp, agno, agbno);
+               /*
+                * If the old buffer at this level is for a different block,
+                * throw it away, otherwise just use it.
+                */
+               bp = cur->bc_bufs[level];
+               if (bp && XFS_BUF_ADDR(bp) != d)
+                       bp = (xfs_buf_t *)0;
+               if (!bp) {
+                       /*
+                        * Need to get a new buffer.  Read it, then 
+                        * set it in the cursor, releasing the old one.
+                        */
+                       if (error = xfs_btree_read_bufs(mp, cur->bc_tp, agno,
+                                       agbno, 0, &bp, XFS_ALLOC_BTREE_REF))
+                               return error;
+                       xfs_btree_setbuf(cur, level, bp);
+                       /*
+                        * Point to the btree block, now that we have the buffer
+                        */
+                       block = XFS_BUF_TO_ALLOC_BLOCK(bp);
+                       if (error = xfs_btree_check_sblock(cur, block, level,
+                                       bp))
+                               return error;
+               } else
+                       block = XFS_BUF_TO_ALLOC_BLOCK(bp);
+               /*
+                * If we already had a key match at a higher level, we know
+                * we need to use the first entry in this block.
+                */
+               if (diff == 0)
+                       keyno = 1;
+               /*
+                * Otherwise we need to search this block.  Do a binary search.
+                */
+               else {
+                       int             high;   /* high entry number */
+                       xfs_alloc_key_t *kkbase;/* base of keys in block */
+                       xfs_alloc_rec_t *krbase;/* base of records in block */
+                       int             low;    /* low entry number */
+
+                       /*
+                        * Get a pointer to keys or records.
+                        */
+                       if (level > 0)
+                               kkbase = XFS_ALLOC_KEY_ADDR(block, 1, cur);
+                       else
+                               krbase = XFS_ALLOC_REC_ADDR(block, 1, cur);
+                       /*
+                        * Set low and high entry numbers, 1-based.
+                        */
+                       low = 1;
+                       if (!(high = INT_GET(block->bb_numrecs, ARCH_CONVERT))) {
+                               /*
+                                * If the block is empty, the tree must
+                                * be an empty leaf.
+                                */
+                               ASSERT(level == 0 && cur->bc_nlevels == 1);
+                               cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
+                               *stat = 0;
+                               return 0;
+                       }
+                       /*
+                        * Binary search the block.
+                        */
+                       while (low <= high) {
+                               xfs_extlen_t    blockcount;     /* key value */
+                               xfs_agblock_t   startblock;     /* key value */
+
+                               XFS_STATS_INC(xs_abt_compare);
+                               /*
+                                * keyno is average of low and high.
+                                */
+                               keyno = (low + high) >> 1;
+                               /*
+                                * Get startblock & blockcount.
+                                */
+                               if (level > 0) {
+                                       xfs_alloc_key_t *kkp;
+
+                                       kkp = kkbase + keyno - 1;
+                                       startblock = INT_GET(kkp->ar_startblock, ARCH_CONVERT);
+                                       blockcount = INT_GET(kkp->ar_blockcount, ARCH_CONVERT);
+                               } else {
+                                       xfs_alloc_rec_t *krp;
+
+                                       krp = krbase + keyno - 1;
+                                       startblock = INT_GET(krp->ar_startblock, ARCH_CONVERT);
+                                       blockcount = INT_GET(krp->ar_blockcount, ARCH_CONVERT);
+                               }
+                               /*
+                                * Compute difference to get next direction.
+                                */
+                               if (cur->bc_btnum == XFS_BTNUM_BNO)
+                                       diff = (int)startblock -
+                                              (int)cur->bc_rec.a.ar_startblock;
+                               else if (!(diff = (int)blockcount -
+                                           (int)cur->bc_rec.a.ar_blockcount))
+                                       diff = (int)startblock -
+                                           (int)cur->bc_rec.a.ar_startblock;
+                               /*
+                                * Less than, move right.
+                                */
+                               if (diff < 0)
+                                       low = keyno + 1;
+                               /*
+                                * Greater than, move left.
+                                */
+                               else if (diff > 0)
+                                       high = keyno - 1;
+                               /*
+                                * Equal, we're done.
+                                */
+                               else
+                                       break;
+                       }
+               }
+               /*
+                * If there are more levels, set up for the next level
+                * by getting the block number and filling in the cursor.
+                */
+               if (level > 0) {
+                       /*
+                        * If we moved left, need the previous key number,
+                        * unless there isn't one.
+                        */
+                       if (diff > 0 && --keyno < 1)
+                               keyno = 1;
+                       agbno = INT_GET(*XFS_ALLOC_PTR_ADDR(block, keyno, cur), ARCH_CONVERT);
+#ifdef DEBUG
+                       if (error = xfs_btree_check_sptr(cur, agbno, level))
+                               return error;
+#endif
+                       cur->bc_ptrs[level] = keyno;
+               }
+       }
+       /*
+        * Done with the search.
+        * See if we need to adjust the results.
+        */
+       if (dir != XFS_LOOKUP_LE && diff < 0) {
+               keyno++;
+               /*
+                * If ge search and we went off the end of the block, but it's
+                * not the last block, we're in the wrong block.
+                */
+               if (dir == XFS_LOOKUP_GE &&
+                   keyno > INT_GET(block->bb_numrecs, ARCH_CONVERT) &&
+                   INT_GET(block->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) {
+                       int     i;
+
+                       cur->bc_ptrs[0] = keyno;
+                       if (error = xfs_alloc_increment(cur, 0, &i))
+                               return error;
+                       XFS_WANT_CORRUPTED_RETURN(i == 1);
+                       *stat = 1;
+                       return 0;
+               }
+       }
+       else if (dir == XFS_LOOKUP_LE && diff > 0)
+               keyno--;
+       cur->bc_ptrs[0] = keyno;
+       /*
+        * Return if we succeeded or not.
+        */
+       if (keyno == 0 || keyno > INT_GET(block->bb_numrecs, ARCH_CONVERT))
+               *stat = 0;
+       else
+               *stat = ((dir != XFS_LOOKUP_EQ) || (diff == 0));
+       return 0;
+}
+
+/*
+ * Move 1 record left from cur/level if possible.
+ * Update cur to reflect the new path.
+ */
+STATIC int                             /* error */
+xfs_alloc_lshift(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       int                     level,  /* level to shift record on */
+       int                     *stat)  /* success/failure */
+{
+       int                     error;  /* error return value */
+#ifdef DEBUG
+       int                     i;      /* loop index */
+#endif
+       xfs_alloc_key_t         key;    /* key value for leaf level upward */
+       xfs_buf_t                       *lbp;   /* buffer for left neighbor block */
+       xfs_alloc_block_t       *left;  /* left neighbor btree block */
+       int                     nrec;   /* new number of left block entries */
+       xfs_buf_t                       *rbp;   /* buffer for right (current) block */
+       xfs_alloc_block_t       *right; /* right (current) btree block */
+       xfs_alloc_key_t         *rkp;   /* key pointer for right block */
+       xfs_alloc_ptr_t         *rpp;   /* address pointer for right block */
+       xfs_alloc_rec_t         *rrp;   /* record pointer for right block */
+
+       /*
+        * Set up variables for this block as "right".
+        */
+       rbp = cur->bc_bufs[level];
+       right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
+#ifdef DEBUG
+       if (error = xfs_btree_check_sblock(cur, right, level, rbp))
+               return error;
+#endif
+       /*
+        * If we've got no left sibling then we can't shift an entry left.
+        */
+       if (INT_GET(right->bb_leftsib, ARCH_CONVERT) == NULLAGBLOCK) {
+               *stat = 0;
+               return 0;
+       }
+       /*
+        * If the cursor entry is the one that would be moved, don't 
+        * do it... it's too complicated.
+        */
+       if (cur->bc_ptrs[level] <= 1) {
+               *stat = 0;
+               return 0;
+       }
+       /*
+        * Set up the left neighbor as "left".
+        */
+       if (error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
+                       cur->bc_private.a.agno, INT_GET(right->bb_leftsib, ARCH_CONVERT), 0, &lbp,
+                       XFS_ALLOC_BTREE_REF))
+               return error;
+       left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
+       if (error = xfs_btree_check_sblock(cur, left, level, lbp))
+               return error;
+       /*
+        * If it's full, it can't take another entry.
+        */
+       if (INT_GET(left->bb_numrecs, ARCH_CONVERT) == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
+               *stat = 0;
+               return 0;
+       }
+       nrec = INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1;
+       /*
+        * If non-leaf, copy a key and a ptr to the left block.
+        */
+       if (level > 0) {
+               xfs_alloc_key_t *lkp;   /* key pointer for left block */
+               xfs_alloc_ptr_t *lpp;   /* address pointer for left block */
+
+               lkp = XFS_ALLOC_KEY_ADDR(left, nrec, cur);
+               rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur);
+               *lkp = *rkp;
+               xfs_alloc_log_keys(cur, lbp, nrec, nrec);
+               lpp = XFS_ALLOC_PTR_ADDR(left, nrec, cur);
+               rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur);
+#ifdef DEBUG
+               if (error = xfs_btree_check_sptr(cur, INT_GET(*rpp, ARCH_CONVERT), level))
+                       return error;
+#endif
+               *lpp = *rpp; /* INT_: copy */
+               xfs_alloc_log_ptrs(cur, lbp, nrec, nrec);
+               xfs_btree_check_key(cur->bc_btnum, lkp - 1, lkp);
+       }
+       /*
+        * If leaf, copy a record to the left block.
+        */
+       else {
+               xfs_alloc_rec_t *lrp;   /* record pointer for left block */
+
+               lrp = XFS_ALLOC_REC_ADDR(left, nrec, cur);
+               rrp = XFS_ALLOC_REC_ADDR(right, 1, cur);
+               *lrp = *rrp;
+               xfs_alloc_log_recs(cur, lbp, nrec, nrec);
+               xfs_btree_check_rec(cur->bc_btnum, lrp - 1, lrp);
+       }
+       /*
+        * Bump and log left's numrecs, decrement and log right's numrecs.
+        */
+       INT_MOD(left->bb_numrecs, ARCH_CONVERT, +1);
+       xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
+       INT_MOD(right->bb_numrecs, ARCH_CONVERT, -1);
+       xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
+       /*
+        * Slide the contents of right down one entry.
+        */
+       if (level > 0) {
+#ifdef DEBUG
+               for (i = 0; i < INT_GET(right->bb_numrecs, ARCH_CONVERT); i++) {
+                       if (error = xfs_btree_check_sptr(cur, INT_GET(rpp[i + 1], ARCH_CONVERT),
+                                       level))
+                               return error;
+               }
+#endif
+               ovbcopy(rkp + 1, rkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
+               ovbcopy(rpp + 1, rpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
+               xfs_alloc_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
+               xfs_alloc_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
+       } else {
+               ovbcopy(rrp + 1, rrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
+               xfs_alloc_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
+               key.ar_startblock = rrp->ar_startblock; /* INT_: direct copy */
+               key.ar_blockcount = rrp->ar_blockcount; /* INT_: direct copy */
+               rkp = &key;
+       }
+       /*
+        * Update the parent key values of right.
+        */
+       if (error = xfs_alloc_updkey(cur, rkp, level + 1))
+               return error;
+       /*
+        * Slide the cursor value left one.
+        */
+       cur->bc_ptrs[level]--;
+       *stat = 1;
+       return 0;
+}
+
+/*
+ * Allocate a new root block, fill it in.
+ */
+STATIC int                             /* error */
+xfs_alloc_newroot(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       int                     *stat)  /* success/failure */
+{
+       int                     error;  /* error return value */
+       xfs_agblock_t           lbno;   /* left block number */
+       xfs_buf_t                       *lbp;   /* left btree buffer */
+       xfs_alloc_block_t       *left;  /* left btree block */
+       xfs_mount_t             *mp;    /* mount structure */
+       xfs_agblock_t           nbno;   /* new block number */
+       xfs_buf_t                       *nbp;   /* new (root) buffer */
+       xfs_alloc_block_t       *new;   /* new (root) btree block */
+       int                     nptr;   /* new value for key index, 1 or 2 */
+       xfs_agblock_t           rbno;   /* right block number */
+       xfs_buf_t                       *rbp;   /* right btree buffer */
+       xfs_alloc_block_t       *right; /* right btree block */
+
+       mp = cur->bc_mp;
+
+       ASSERT(cur->bc_nlevels < XFS_AG_MAXLEVELS(mp));
+       /*
+        * Get a buffer from the freelist blocks, for the new root.
+        */
+       if (error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp,
+                       &nbno))
+               return error;
+       /*
+        * None available, we fail.
+        */
+       if (nbno == NULLAGBLOCK) {
+               *stat = 0;
+               return 0;
+       }
+       xfs_trans_agbtree_delta(cur->bc_tp, 1);
+       nbp = xfs_btree_get_bufs(mp, cur->bc_tp, cur->bc_private.a.agno, nbno,
+               0);
+       new = XFS_BUF_TO_ALLOC_BLOCK(nbp);
+       /*
+        * Set the root data in the a.g. freespace structure.
+        */
+       {
+               xfs_agf_t       *agf;   /* a.g. freespace header */
+               xfs_agnumber_t  seqno;
+
+               agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
+               INT_SET(agf->agf_roots[cur->bc_btnum], ARCH_CONVERT, nbno);
+               INT_MOD(agf->agf_levels[cur->bc_btnum], ARCH_CONVERT, 1);
+               seqno = INT_GET(agf->agf_seqno, ARCH_CONVERT);
+               mp->m_perag[seqno].pagf_levels[cur->bc_btnum]++;
+               xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
+                       XFS_AGF_ROOTS | XFS_AGF_LEVELS);
+       }
+       /*
+        * At the previous root level there are now two blocks: the old
+        * root, and the new block generated when it was split.
+        * We don't know which one the cursor is pointing at, so we
+        * set up variables "left" and "right" for each case.
+        */
+       lbp = cur->bc_bufs[cur->bc_nlevels - 1];
+       left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
+#ifdef DEBUG
+       if (error = xfs_btree_check_sblock(cur, left, cur->bc_nlevels - 1, lbp))
+               return error;
+#endif
+       if (INT_GET(left->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) {
+               /*
+                * Our block is left, pick up the right block.
+                */
+               lbno = XFS_DADDR_TO_AGBNO(mp, XFS_BUF_ADDR(lbp));
+               rbno = INT_GET(left->bb_rightsib, ARCH_CONVERT);
+               if (error = xfs_btree_read_bufs(mp, cur->bc_tp,
+                               cur->bc_private.a.agno, rbno, 0, &rbp,
+                               XFS_ALLOC_BTREE_REF))
+                       return error;
+               right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
+               if (error = xfs_btree_check_sblock(cur, right,
+                               cur->bc_nlevels - 1, rbp))
+                       return error;
+               nptr = 1;
+       } else {
+               /*
+                * Our block is right, pick up the left block.
+                */
+               rbp = lbp;
+               right = left;
+               rbno = XFS_DADDR_TO_AGBNO(mp, XFS_BUF_ADDR(rbp));
+               lbno = INT_GET(right->bb_leftsib, ARCH_CONVERT);
+               if (error = xfs_btree_read_bufs(mp, cur->bc_tp,
+                               cur->bc_private.a.agno, lbno, 0, &lbp,
+                               XFS_ALLOC_BTREE_REF))
+                       return error;
+               left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
+               if (error = xfs_btree_check_sblock(cur, left,
+                               cur->bc_nlevels - 1, lbp))
+                       return error;
+               nptr = 2;
+       }
+       /*
+        * Fill in the new block's btree header and log it.
+        */
+       INT_SET(new->bb_magic, ARCH_CONVERT, xfs_magics[cur->bc_btnum]);
+       INT_SET(new->bb_level, ARCH_CONVERT, (__uint16_t)cur->bc_nlevels);
+       INT_SET(new->bb_numrecs, ARCH_CONVERT, 2);
+       INT_SET(new->bb_leftsib, ARCH_CONVERT, NULLAGBLOCK);
+        INT_SET(new->bb_rightsib, ARCH_CONVERT, NULLAGBLOCK);
+       xfs_alloc_log_block(cur->bc_tp, nbp, XFS_BB_ALL_BITS);
+       ASSERT(lbno != NULLAGBLOCK && rbno != NULLAGBLOCK);
+       /*
+        * Fill in the key data in the new root.
+        */
+       {
+               xfs_alloc_key_t         *kp;    /* btree key pointer */
+
+               kp = XFS_ALLOC_KEY_ADDR(new, 1, cur);
+               if (INT_GET(left->bb_level, ARCH_CONVERT) > 0) {
+                       kp[0] = *XFS_ALLOC_KEY_ADDR(left, 1, cur); /* INT_: structure copy */
+                       kp[1] = *XFS_ALLOC_KEY_ADDR(right, 1, cur);/* INT_: structure copy */
+               } else {
+                       xfs_alloc_rec_t *rp;    /* btree record pointer */
+
+                       rp = XFS_ALLOC_REC_ADDR(left, 1, cur);
+                       kp[0].ar_startblock = rp->ar_startblock; /* INT_: direct copy */
+                       kp[0].ar_blockcount = rp->ar_blockcount; /* INT_: direct copy */
+                       rp = XFS_ALLOC_REC_ADDR(right, 1, cur);
+                       kp[1].ar_startblock = rp->ar_startblock; /* INT_: direct copy */
+                       kp[1].ar_blockcount = rp->ar_blockcount; /* INT_: direct copy */
+               }
+       }
+       xfs_alloc_log_keys(cur, nbp, 1, 2);
+       /*
+        * Fill in the pointer data in the new root.
+        */
+       {
+               xfs_alloc_ptr_t         *pp;    /* btree address pointer */
+
+               pp = XFS_ALLOC_PTR_ADDR(new, 1, cur);
+               INT_SET(pp[0], ARCH_CONVERT, lbno);
+               INT_SET(pp[1], ARCH_CONVERT, rbno);
+       }
+       xfs_alloc_log_ptrs(cur, nbp, 1, 2);
+       /*
+        * Fix up the cursor.
+        */
+       xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
+       cur->bc_ptrs[cur->bc_nlevels] = nptr;
+       cur->bc_nlevels++;
+       *stat = 1;
+       return 0;
+}
+
+/*
+ * Move 1 record right from cur/level if possible.
+ * Update cur to reflect the new path.
+ */
+STATIC int                             /* error */
+xfs_alloc_rshift(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       int                     level,  /* level to shift record on */
+       int                     *stat)  /* success/failure */
+{
+       int                     error;  /* error return value */
+       int                     i;      /* loop index */
+       xfs_alloc_key_t         key;    /* key value for leaf level upward */
+       xfs_buf_t                       *lbp;   /* buffer for left (current) block */
+       xfs_alloc_block_t       *left;  /* left (current) btree block */
+       xfs_buf_t                       *rbp;   /* buffer for right neighbor block */
+       xfs_alloc_block_t       *right; /* right neighbor btree block */
+       xfs_alloc_key_t         *rkp;   /* key pointer for right block */
+       xfs_btree_cur_t         *tcur;  /* temporary cursor */
+
+       /*
+        * Set up variables for this block as "left".
+        */
+       lbp = cur->bc_bufs[level];
+       left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
+#ifdef DEBUG
+       if (error = xfs_btree_check_sblock(cur, left, level, lbp))
+               return error;
+#endif
+       /*
+        * If we've got no right sibling then we can't shift an entry right.
+        */
+       if (INT_GET(left->bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK) {
+               *stat = 0;
+               return 0;
+       }
+       /*
+        * If the cursor entry is the one that would be moved, don't
+        * do it... it's too complicated.
+        */
+       if (cur->bc_ptrs[level] >= INT_GET(left->bb_numrecs, ARCH_CONVERT)) {
+               *stat = 0;
+               return 0;
+       }
+       /*
+        * Set up the right neighbor as "right".
+        */
+       if (error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
+                       cur->bc_private.a.agno, INT_GET(left->bb_rightsib, ARCH_CONVERT), 0, &rbp,
+                       XFS_ALLOC_BTREE_REF))
+               return error;
+       right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
+       if (error = xfs_btree_check_sblock(cur, right, level, rbp))
+               return error;
+       /*
+        * If it's full, it can't take another entry.
+        */
+       if (INT_GET(right->bb_numrecs, ARCH_CONVERT) == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
+               *stat = 0;
+               return 0;
+       }
+       /*
+        * Make a hole at the start of the right neighbor block, then
+        * copy the last left block entry to the hole.
+        */
+       if (level > 0) {
+               xfs_alloc_key_t *lkp;   /* key pointer for left block */
+               xfs_alloc_ptr_t *lpp;   /* address pointer for left block */
+               xfs_alloc_ptr_t *rpp;   /* address pointer for right block */
+
+               lkp = XFS_ALLOC_KEY_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT), cur);
+               lpp = XFS_ALLOC_PTR_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT), cur);
+               rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur);
+               rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur);
+#ifdef DEBUG
+               for (i = INT_GET(right->bb_numrecs, ARCH_CONVERT) - 1; i >= 0; i--) {
+                       if (error = xfs_btree_check_sptr(cur, INT_GET(rpp[i], ARCH_CONVERT), level))
+                               return error;
+               }
+#endif
+               ovbcopy(rkp, rkp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
+               ovbcopy(rpp, rpp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
+#ifdef DEBUG
+               if (error = xfs_btree_check_sptr(cur, INT_GET(*lpp, ARCH_CONVERT), level))
+                       return error;
+#endif
+               *rkp = *lkp; /* INT_: copy */
+               *rpp = *lpp; /* INT_: copy */
+               xfs_alloc_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1);
+               xfs_alloc_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1);
+               xfs_btree_check_key(cur->bc_btnum, rkp, rkp + 1);
+       } else {
+               xfs_alloc_rec_t *lrp;   /* record pointer for left block */
+               xfs_alloc_rec_t *rrp;   /* record pointer for right block */
+
+               lrp = XFS_ALLOC_REC_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT), cur);
+               rrp = XFS_ALLOC_REC_ADDR(right, 1, cur);
+               ovbcopy(rrp, rrp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
+               *rrp = *lrp;
+               xfs_alloc_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1);
+               key.ar_startblock = rrp->ar_startblock; /* INT_: direct copy */
+               key.ar_blockcount = rrp->ar_blockcount; /* INT_: direct copy */
+               rkp = &key;
+               xfs_btree_check_rec(cur->bc_btnum, rrp, rrp + 1);
+       }
+       /*
+        * Decrement and log left's numrecs, bump and log right's numrecs.
+        */
+       INT_MOD(left->bb_numrecs, ARCH_CONVERT, -1);
+       xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
+       INT_MOD(right->bb_numrecs, ARCH_CONVERT, +1);
+       xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
+       /*
+        * Using a temporary cursor, update the parent key values of the
+        * block on the right.
+        */
+       if (error = xfs_btree_dup_cursor(cur, &tcur))
+               return error;
+       i = xfs_btree_lastrec(tcur, level);
+       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+       if ((error = xfs_alloc_increment(tcur, level, &i)) ||
+           (error = xfs_alloc_updkey(tcur, rkp, level + 1)))
+               goto error0;
+       xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
+       *stat = 1;
+       return 0;
+error0:
+       xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
+       return error;
+}
+
+/*
+ * Split cur/level block in half.
+ * Return new block number and its first record (to be inserted into parent).
+ */
+STATIC int                             /* error */
+xfs_alloc_split(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       int                     level,  /* level to split */
+       xfs_agblock_t           *bnop,  /* output: block number allocated */
+       xfs_alloc_key_t         *keyp,  /* output: first key of new block */
+       xfs_btree_cur_t         **curp, /* output: new cursor */
+       int                     *stat)  /* success/failure */
+{
+       int                     error;  /* error return value */
+       int                     i;      /* loop index/record number */
+       xfs_agblock_t           lbno;   /* left (current) block number */
+       xfs_buf_t                       *lbp;   /* buffer for left block */
+       xfs_alloc_block_t       *left;  /* left (current) btree block */
+       xfs_agblock_t           rbno;   /* right (new) block number */
+       xfs_buf_t                       *rbp;   /* buffer for right block */
+       xfs_alloc_block_t       *right; /* right (new) btree block */
+
+       /*
+        * Allocate the new block from the freelist.
+        * If we can't do it, we're toast.  Give up.
+        */
+       if (error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp,
+                       &rbno))
+               return error;
+       if (rbno == NULLAGBLOCK) {
+               *stat = 0;
+               return 0;
+       }
+       xfs_trans_agbtree_delta(cur->bc_tp, 1);
+       rbp = xfs_btree_get_bufs(cur->bc_mp, cur->bc_tp, cur->bc_private.a.agno,
+               rbno, 0);
+       /*
+        * Set up the new block as "right".
+        */
+       right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
+       /*
+        * "Left" is the current (according to the cursor) block.
+        */
+       lbp = cur->bc_bufs[level];
+       left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
+#ifdef DEBUG
+       if (error = xfs_btree_check_sblock(cur, left, level, lbp))
+               return error;
+#endif
+       /*
+        * Fill in the btree header for the new block.
+        */
+       INT_SET(right->bb_magic, ARCH_CONVERT, xfs_magics[cur->bc_btnum]);
+       right->bb_level = left->bb_level; /* INT_: direct copy */
+       INT_SET(right->bb_numrecs, ARCH_CONVERT, (__uint16_t)(INT_GET(left->bb_numrecs, ARCH_CONVERT) / 2));
+       /*
+        * Make sure that if there's an odd number of entries now, that
+        * each new block will have the same number of entries.
+        */
+       if ((INT_GET(left->bb_numrecs, ARCH_CONVERT) & 1) &&
+           cur->bc_ptrs[level] <= INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1)
+               INT_MOD(right->bb_numrecs, ARCH_CONVERT, +1);
+       i = INT_GET(left->bb_numrecs, ARCH_CONVERT) - INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1;
+       /*
+        * For non-leaf blocks, copy keys and addresses over to the new block.
+        */
+       if (level > 0) {
+               xfs_alloc_key_t *lkp;   /* left btree key pointer */
+               xfs_alloc_ptr_t *lpp;   /* left btree address pointer */
+               xfs_alloc_key_t *rkp;   /* right btree key pointer */
+               xfs_alloc_ptr_t *rpp;   /* right btree address pointer */
+
+               lkp = XFS_ALLOC_KEY_ADDR(left, i, cur);
+               lpp = XFS_ALLOC_PTR_ADDR(left, i, cur);
+               rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur);
+               rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur);
+#ifdef DEBUG
+               for (i = 0; i < INT_GET(right->bb_numrecs, ARCH_CONVERT); i++) {
+                       if (error = xfs_btree_check_sptr(cur, INT_GET(lpp[i], ARCH_CONVERT), level))
+                               return error;
+               }
+#endif
+               bcopy(lkp, rkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp)); /* INT_: copy */
+               bcopy(lpp, rpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));/* INT_: copy */
+               xfs_alloc_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
+               xfs_alloc_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
+               *keyp = *rkp;
+       }
+       /*
+        * For leaf blocks, copy records over to the new block.
+        */
+       else {
+               xfs_alloc_rec_t *lrp;   /* left btree record pointer */
+               xfs_alloc_rec_t *rrp;   /* right btree record pointer */
+
+               lrp = XFS_ALLOC_REC_ADDR(left, i, cur);
+               rrp = XFS_ALLOC_REC_ADDR(right, 1, cur);
+               bcopy(lrp, rrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
+               xfs_alloc_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
+               keyp->ar_startblock = rrp->ar_startblock; /* INT_: direct copy */
+               keyp->ar_blockcount = rrp->ar_blockcount; /* INT_: direct copy */
+       }
+       /*
+        * Find the left block number by looking in the buffer.
+        * Adjust numrecs, sibling pointers.
+        */
+       lbno = XFS_DADDR_TO_AGBNO(cur->bc_mp, XFS_BUF_ADDR(lbp));
+       INT_MOD(left->bb_numrecs, ARCH_CONVERT, -(INT_GET(right->bb_numrecs, ARCH_CONVERT)));
+       right->bb_rightsib = left->bb_rightsib; /* INT_: direct copy */
+       INT_SET(left->bb_rightsib, ARCH_CONVERT, rbno);
+       INT_SET(right->bb_leftsib, ARCH_CONVERT, lbno);
+       xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_ALL_BITS);
+       xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
+       /*
+        * If there's a block to the new block's right, make that block
+        * point back to right instead of to left.
+        */
+       if (INT_GET(right->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) {
+               xfs_alloc_block_t       *rrblock;       /* rr btree block */
+               xfs_buf_t                       *rrbp;          /* buffer for rrblock */
+
+               if (error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
+                               cur->bc_private.a.agno, INT_GET(right->bb_rightsib, ARCH_CONVERT), 0,
+                               &rrbp, XFS_ALLOC_BTREE_REF))
+                       return error;
+               rrblock = XFS_BUF_TO_ALLOC_BLOCK(rrbp);
+               if (error = xfs_btree_check_sblock(cur, rrblock, level, rrbp))
+                       return error;
+               INT_SET(rrblock->bb_leftsib, ARCH_CONVERT, rbno);
+               xfs_alloc_log_block(cur->bc_tp, rrbp, XFS_BB_LEFTSIB);
+       }
+       /*
+        * If the cursor is really in the right block, move it there.
+        * If it's just pointing past the last entry in left, then we'll
+        * insert there, so don't change anything in that case.
+        */
+       if (cur->bc_ptrs[level] > INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1) {
+               xfs_btree_setbuf(cur, level, rbp);
+               cur->bc_ptrs[level] -= INT_GET(left->bb_numrecs, ARCH_CONVERT);
+       }
+       /*
+        * If there are more levels, we'll need another cursor which refers to
+        * the right block, no matter where this cursor was.
+        */
+       if (level + 1 < cur->bc_nlevels) {
+               if (error = xfs_btree_dup_cursor(cur, curp))
+                       return error;
+               (*curp)->bc_ptrs[level + 1]++;
+       }
+       *bnop = rbno;
+       *stat = 1;
+       return 0;
+}
+
+/*
+ * Update keys at all levels from here to the root along the cursor's path.
+ */
+STATIC int                             /* error */
+xfs_alloc_updkey(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       xfs_alloc_key_t         *keyp,  /* new key value to update to */
+       int                     level)  /* starting level for update */
+{
+       int                     ptr;    /* index of key in block */
+
+       /*
+        * Go up the tree from this level toward the root.
+        * At each level, update the key value to the value input.
+        * Stop when we reach a level where the cursor isn't pointing
+        * at the first entry in the block.
+        */
+       for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
+               xfs_alloc_block_t       *block; /* btree block */
+               xfs_buf_t                       *bp;    /* buffer for block */
+#ifdef DEBUG
+               int                     error;  /* error return value */
+#endif
+               xfs_alloc_key_t         *kp;    /* ptr to btree block keys */
+
+               bp = cur->bc_bufs[level];
+               block = XFS_BUF_TO_ALLOC_BLOCK(bp);
+#ifdef DEBUG
+               if (error = xfs_btree_check_sblock(cur, block, level, bp))
+                       return error;
+#endif
+               ptr = cur->bc_ptrs[level];
+               kp = XFS_ALLOC_KEY_ADDR(block, ptr, cur);
+               *kp = *keyp;
+               xfs_alloc_log_keys(cur, bp, ptr, ptr);
+       }
+       return 0;
+}
+
+/*
+ * Externally visible routines.
+ */
+
+/*
+ * Decrement cursor by one record at the level.
+ * For nonzero levels the leaf-ward information is untouched.
+ */
+int                                    /* error */
+xfs_alloc_decrement(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       int                     level,  /* level in btree, 0 is leaf */
+       int                     *stat)  /* success/failure */
+{
+       xfs_alloc_block_t       *block; /* btree block */
+       int                     error;  /* error return value */
+       int                     lev;    /* btree level */
+
+       ASSERT(level < cur->bc_nlevels);
+       /*
+        * Read-ahead to the left at this level.
+        */
+       xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
+       /*
+        * Decrement the ptr at this level.  If we're still in the block
+        * then we're done.
+        */
+       if (--cur->bc_ptrs[level] > 0) {
+               *stat = 1;
+               return 0;
+       }
+       /*
+        * Get a pointer to the btree block.
+        */
+       block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[level]);
+#ifdef DEBUG
+       if (error = xfs_btree_check_sblock(cur, block, level,
+                       cur->bc_bufs[level]))
+               return error;
+#endif
+       /*
+        * If we just went off the left edge of the tree, return failure.
+        */
+       if (INT_GET(block->bb_leftsib, ARCH_CONVERT) == NULLAGBLOCK) {
+               *stat = 0;
+               return 0;
+       }
+       /*
+        * March up the tree decrementing pointers.
+        * Stop when we don't go off the left edge of a block.
+        */
+       for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
+               if (--cur->bc_ptrs[lev] > 0)
+                       break;
+               /*
+                * Read-ahead the left block, we're going to read it 
+                * in the next loop.
+                */
+               xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
+       }
+       /*
+        * If we went off the root then we are seriously confused.
+        */
+       ASSERT(lev < cur->bc_nlevels);
+       /*
+        * Now walk back down the tree, fixing up the cursor's buffer
+        * pointers and key numbers.
+        */
+       for (block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[lev]); lev > level; ) {
+               xfs_agblock_t   agbno;  /* block number of btree block */
+               xfs_buf_t               *bp;    /* buffer pointer for block */
+
+               agbno = INT_GET(*XFS_ALLOC_PTR_ADDR(block, cur->bc_ptrs[lev], cur), ARCH_CONVERT);
+               if (error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
+                               cur->bc_private.a.agno, agbno, 0, &bp,
+                               XFS_ALLOC_BTREE_REF))
+                       return error;
+               lev--;
+               xfs_btree_setbuf(cur, lev, bp);
+               block = XFS_BUF_TO_ALLOC_BLOCK(bp);
+               if (error = xfs_btree_check_sblock(cur, block, lev, bp))
+                       return error;
+               cur->bc_ptrs[lev] = INT_GET(block->bb_numrecs, ARCH_CONVERT);
+       }
+       *stat = 1;
+       return 0;
+}
+
+/*
+ * Delete the record pointed to by cur.
+ * The cursor refers to the place where the record was (could be inserted)
+ * when the operation returns.
+ */
+int                                    /* error */
+xfs_alloc_delete(
+       xfs_btree_cur_t *cur,           /* btree cursor */
+       int             *stat)          /* success/failure */
+{
+       int             error;          /* error return value */
+       int             i;              /* result code */
+       int             level;          /* btree level */
+
+       /*
+        * Go up the tree, starting at leaf level.
+        * If 2 is returned then a join was done; go to the next level.
+        * Otherwise we are done.
+        */
+       for (level = 0, i = 2; i == 2; level++) {
+               if (error = xfs_alloc_delrec(cur, level, &i))
+                       return error;
+       }
+       if (i == 0) {
+               for (level = 1; level < cur->bc_nlevels; level++) {
+                       if (cur->bc_ptrs[level] == 0) {
+                               if (error = xfs_alloc_decrement(cur, level, &i))
+                                       return error;
+                               break;
+                       }
+               }
+       }
+       *stat = i;
+       return 0;
+}
+
+/* 
+ * Get the data from the pointed-to record.
+ */
+int                                    /* error */
+xfs_alloc_get_rec(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       xfs_agblock_t           *bno,   /* output: starting block of extent */
+       xfs_extlen_t            *len,   /* output: length of extent */
+       int                     *stat)  /* output: success/failure */
+{
+       xfs_alloc_block_t       *block; /* btree block */
+#ifdef DEBUG
+       int                     error;  /* error return value */
+#endif
+       int                     ptr;    /* record number */
+
+       ptr = cur->bc_ptrs[0];
+       block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[0]);
+#ifdef DEBUG
+       if (error = xfs_btree_check_sblock(cur, block, 0, cur->bc_bufs[0]))
+               return error;
+#endif
+       /*
+        * Off the right end or left end, return failure.
+        */
+       if (ptr > INT_GET(block->bb_numrecs, ARCH_CONVERT) || ptr <= 0) {
+               *stat = 0;
+               return 0;
+       }
+       /*
+        * Point to the record and extract its data.
+        */
+       {
+               xfs_alloc_rec_t         *rec;   /* record data */
+
+               rec = XFS_ALLOC_REC_ADDR(block, ptr, cur);
+               *bno = INT_GET(rec->ar_startblock, ARCH_CONVERT);
+               *len = INT_GET(rec->ar_blockcount, ARCH_CONVERT);
+       }
+       *stat = 1;
+       return 0;
+}
+
+/*
+ * Increment cursor by one record at the level.
+ * For nonzero levels the leaf-ward information is untouched.
+ */
+int                                    /* error */
+xfs_alloc_increment(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       int                     level,  /* level in btree, 0 is leaf */
+       int                     *stat)  /* success/failure */
+{
+       xfs_alloc_block_t       *block; /* btree block */
+       xfs_buf_t                       *bp;    /* tree block buffer */
+       int                     error;  /* error return value */
+       int                     lev;    /* btree level */
+
+       ASSERT(level < cur->bc_nlevels);
+       /*
+        * Read-ahead to the right at this level.
+        */
+       xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
+       /*
+        * Get a pointer to the btree block.
+        */
+       bp = cur->bc_bufs[level];
+       block = XFS_BUF_TO_ALLOC_BLOCK(bp);
+#ifdef DEBUG
+       if (error = xfs_btree_check_sblock(cur, block, level, bp))
+               return error;
+#endif
+       /*
+        * Increment the ptr at this level.  If we're still in the block
+        * then we're done.
+        */
+       if (++cur->bc_ptrs[level] <= INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
+               *stat = 1;
+               return 0;
+       }
+       /*
+        * If we just went off the right edge of the tree, return failure.
+        */
+       if (INT_GET(block->bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK) {
+               *stat = 0;
+               return 0;
+       }
+       /*
+        * March up the tree incrementing pointers.
+        * Stop when we don't go off the right edge of a block.
+        */
+       for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
+               bp = cur->bc_bufs[lev];
+               block = XFS_BUF_TO_ALLOC_BLOCK(bp);
+#ifdef DEBUG
+               if (error = xfs_btree_check_sblock(cur, block, lev, bp))
+                       return error;
+#endif
+               if (++cur->bc_ptrs[lev] <= INT_GET(block->bb_numrecs, ARCH_CONVERT))
+                       break;
+               /*
+                * Read-ahead the right block, we're going to read it 
+                * in the next loop.
+                */
+               xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
+       }
+       /*
+        * If we went off the root then we are seriously confused.
+        */
+       ASSERT(lev < cur->bc_nlevels);
+       /*
+        * Now walk back down the tree, fixing up the cursor's buffer
+        * pointers and key numbers.
+        */
+       for (bp = cur->bc_bufs[lev], block = XFS_BUF_TO_ALLOC_BLOCK(bp);
+            lev > level; ) {
+               xfs_agblock_t   agbno;  /* block number of btree block */
+
+               agbno = INT_GET(*XFS_ALLOC_PTR_ADDR(block, cur->bc_ptrs[lev], cur), ARCH_CONVERT);
+               if (error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
+                               cur->bc_private.a.agno, agbno, 0, &bp,
+                               XFS_ALLOC_BTREE_REF))
+                       return error;
+               lev--;
+               xfs_btree_setbuf(cur, lev, bp);
+               block = XFS_BUF_TO_ALLOC_BLOCK(bp);
+               if (error = xfs_btree_check_sblock(cur, block, lev, bp))
+                       return error;
+               cur->bc_ptrs[lev] = 1;
+       }
+       *stat = 1;
+       return 0;
+}
+
+/*
+ * Insert the current record at the point referenced by cur.
+ * The cursor may be inconsistent on return if splits have been done.
+ */
+int                                    /* error */
+xfs_alloc_insert(
+       xfs_btree_cur_t *cur,           /* btree cursor */
+       int             *stat)          /* success/failure */
+{
+       int             error;          /* error return value */
+       int             i;              /* result value, 0 for failure */
+       int             level;          /* current level number in btree */
+       xfs_agblock_t   nbno;           /* new block number (split result) */
+       xfs_btree_cur_t *ncur;          /* new cursor (split result) */
+       xfs_alloc_rec_t nrec;           /* record being inserted this level */
+       xfs_btree_cur_t *pcur;          /* previous level's cursor */
+
+       level = 0;
+       nbno = NULLAGBLOCK;
+       INT_SET(nrec.ar_startblock, ARCH_CONVERT, cur->bc_rec.a.ar_startblock);
+       INT_SET(nrec.ar_blockcount, ARCH_CONVERT, cur->bc_rec.a.ar_blockcount);
+       ncur = (xfs_btree_cur_t *)0;
+       pcur = cur;
+       /*
+        * Loop going up the tree, starting at the leaf level.
+        * Stop when we don't get a split block, that must mean that
+        * the insert is finished with this level.
+        */
+       do {
+               /*
+                * Insert nrec/nbno into this level of the tree.
+                * Note if we fail, nbno will be null.
+                */
+               if (error = xfs_alloc_insrec(pcur, level++, &nbno, &nrec, &ncur,
+                               &i)) {
+                       if (pcur != cur)
+                               xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
+                       return error;
+               }
+               /*
+                * See if the cursor we just used is trash.
+                * Can't trash the caller's cursor, but otherwise we should
+                * if ncur is a new cursor or we're about to be done.
+                */
+               if (pcur != cur && (ncur || nbno == NULLAGBLOCK)) {
+                       cur->bc_nlevels = pcur->bc_nlevels;
+                       xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
+               }
+               /*
+                * If we got a new cursor, switch to it.
+                */
+               if (ncur) {
+                       pcur = ncur;
+                       ncur = (xfs_btree_cur_t *)0;
+               }
+       } while (nbno != NULLAGBLOCK);
+       *stat = i;
+       return 0;
+}
+
+/*
+ * Lookup the record equal to [bno, len] in the btree given by cur.
+ */
+int                                    /* error */
+xfs_alloc_lookup_eq(
+       xfs_btree_cur_t *cur,           /* btree cursor */
+       xfs_agblock_t   bno,            /* starting block of extent */
+       xfs_extlen_t    len,            /* length of extent */
+       int             *stat)          /* success/failure */
+{
+       cur->bc_rec.a.ar_startblock = bno;
+       cur->bc_rec.a.ar_blockcount = len;
+       return xfs_alloc_lookup(cur, XFS_LOOKUP_EQ, stat);
+}
+
+/*
+ * Lookup the first record greater than or equal to [bno, len]
+ * in the btree given by cur.
+ */
+int                                    /* error */
+xfs_alloc_lookup_ge(
+       xfs_btree_cur_t *cur,           /* btree cursor */
+       xfs_agblock_t   bno,            /* starting block of extent */
+       xfs_extlen_t    len,            /* length of extent */
+       int             *stat)          /* success/failure */
+{
+       cur->bc_rec.a.ar_startblock = bno;
+       cur->bc_rec.a.ar_blockcount = len;
+       return xfs_alloc_lookup(cur, XFS_LOOKUP_GE, stat);
+}
+
+/*
+ * Lookup the first record less than or equal to [bno, len]
+ * in the btree given by cur.
+ */
+int                                    /* error */
+xfs_alloc_lookup_le(
+       xfs_btree_cur_t *cur,           /* btree cursor */
+       xfs_agblock_t   bno,            /* starting block of extent */
+       xfs_extlen_t    len,            /* length of extent */
+       int             *stat)          /* success/failure */
+{
+       cur->bc_rec.a.ar_startblock = bno;
+       cur->bc_rec.a.ar_blockcount = len;
+       return xfs_alloc_lookup(cur, XFS_LOOKUP_LE, stat);
+}
+
+/*
+ * Update the record referred to by cur, to the value given by [bno, len].
+ * This either works (return 0) or gets an EFSCORRUPTED error.
+ */
+int                                    /* error */
+xfs_alloc_update(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       xfs_agblock_t           bno,    /* starting block of extent */
+       xfs_extlen_t            len)    /* length of extent */
+{
+       xfs_alloc_block_t       *block; /* btree block to update */
+       int                     error;  /* error return value */
+       int                     ptr;    /* current record number (updating) */
+
+       ASSERT(len > 0);
+       /*
+        * Pick up the a.g. freelist struct and the current block.
+        */
+       block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[0]);
+#ifdef DEBUG
+       if (error = xfs_btree_check_sblock(cur, block, 0, cur->bc_bufs[0]))
+               return error;
+#endif
+       /*
+        * Get the address of the rec to be updated.
+        */
+       ptr = cur->bc_ptrs[0];
+       {
+               xfs_alloc_rec_t         *rp;    /* pointer to updated record */
+
+               rp = XFS_ALLOC_REC_ADDR(block, ptr, cur);
+               /*
+                * Fill in the new contents and log them.
+                */
+               INT_SET(rp->ar_startblock, ARCH_CONVERT, bno);
+               INT_SET(rp->ar_blockcount, ARCH_CONVERT, len);
+               xfs_alloc_log_recs(cur, cur->bc_bufs[0], ptr, ptr);
+       }
+       /*
+        * If it's the by-size btree and it's the last leaf block and
+        * it's the last record... then update the size of the longest
+        * extent in the a.g., which we cache in the a.g. freelist header.
+        */
+       if (cur->bc_btnum == XFS_BTNUM_CNT &&
+           INT_GET(block->bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK &&
+           ptr == INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
+               xfs_agf_t       *agf;   /* a.g. freespace header */
+               xfs_agnumber_t  seqno;
+
+               agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
+               seqno = INT_GET(agf->agf_seqno, ARCH_CONVERT);
+               cur->bc_mp->m_perag[seqno].pagf_longest = len;
+               INT_SET(agf->agf_longest, ARCH_CONVERT, len);
+               xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
+                       XFS_AGF_LONGEST);
+       }
+       /*
+        * Updating first record in leaf. Pass new key value up to our parent.
+        */
+       if (ptr == 1) {
+               xfs_alloc_key_t key;    /* key containing [bno, len] */
+
+               INT_SET(key.ar_startblock, ARCH_CONVERT, bno);
+               INT_SET(key.ar_blockcount, ARCH_CONVERT, len);
+               if (error = xfs_alloc_updkey(cur, &key, 1))
+                       return error;
+       }
+       return 0;
+}
diff --git a/libxfs/xfs_attr_leaf.c b/libxfs/xfs_attr_leaf.c
new file mode 100644 (file)
index 0000000..f3b02e0
--- /dev/null
@@ -0,0 +1,1169 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <xfs.h>
+
+/*
+ * xfs_attr_leaf.c
+ *
+ * Routines to implement leaf blocks of attributes as Btrees of hashed names.
+ */
+
+/*========================================================================
+ * Routines used for growing the Btree.
+ *========================================================================*/
+
+/*
+ * Create the initial contents of a leaf attribute list
+ * or a leaf in a node attribute list.
+ */
+int
+xfs_attr_leaf_create(xfs_da_args_t *args, xfs_dablk_t blkno, xfs_dabuf_t **bpp)
+{
+       xfs_attr_leafblock_t *leaf;
+       xfs_attr_leaf_hdr_t *hdr;
+       xfs_inode_t *dp;
+       xfs_dabuf_t *bp;
+       int error;
+
+       dp = args->dp;
+       ASSERT(dp != NULL);
+       error = xfs_da_get_buf(args->trans, args->dp, blkno, -1, &bp,
+                                           XFS_ATTR_FORK);
+       if (error)
+               return(error);
+       ASSERT(bp != NULL);
+       leaf = bp->data;
+       bzero((char *)leaf, XFS_LBSIZE(dp->i_mount));
+       hdr = &leaf->hdr;
+       INT_SET(hdr->info.magic, ARCH_CONVERT, XFS_ATTR_LEAF_MAGIC);
+       INT_SET(hdr->firstused, ARCH_CONVERT, XFS_LBSIZE(dp->i_mount));
+       if (INT_GET(hdr->firstused, ARCH_CONVERT) == 0) {
+               INT_SET(hdr->firstused, ARCH_CONVERT,
+                       XFS_LBSIZE(dp->i_mount) - XFS_ATTR_LEAF_NAME_ALIGN);
+       }
+
+       INT_SET(hdr->freemap[0].base, ARCH_CONVERT,
+                                               sizeof(xfs_attr_leaf_hdr_t));
+       INT_SET(hdr->freemap[0].size, ARCH_CONVERT,
+                                         INT_GET(hdr->firstused, ARCH_CONVERT)
+                                       - INT_GET(hdr->freemap[0].base,
+                                                               ARCH_CONVERT));
+
+       xfs_da_log_buf(args->trans, bp, 0, XFS_LBSIZE(dp->i_mount) - 1);
+
+       *bpp = bp;
+       return(0);
+}
+
+/*
+ * Split the leaf node, rebalance, then add the new entry.
+ */
+int
+xfs_attr_leaf_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
+                                  xfs_da_state_blk_t *newblk)
+{
+       xfs_dablk_t blkno;
+       int error;
+
+       /*
+        * Allocate space for a new leaf node.
+        */
+       ASSERT(oldblk->magic == XFS_ATTR_LEAF_MAGIC);
+       error = xfs_da_grow_inode(state->args, &blkno);
+       if (error)
+               return(error);
+       error = xfs_attr_leaf_create(state->args, blkno, &newblk->bp);
+       if (error)
+               return(error);
+       newblk->blkno = blkno;
+       newblk->magic = XFS_ATTR_LEAF_MAGIC;
+
+       /*
+        * Rebalance the entries across the two leaves.
+        * NOTE: rebalance() currently depends on the 2nd block being empty.
+        */
+       xfs_attr_leaf_rebalance(state, oldblk, newblk);
+       error = xfs_da_blk_link(state, oldblk, newblk);
+       if (error)
+               return(error);
+
+       /*
+        * Save info on "old" attribute for "atomic rename" ops, leaf_add()
+        * modifies the index/blkno/rmtblk/rmtblkcnt fields to show the
+        * "new" attrs info.  Will need the "old" info to remove it later.
+        *
+        * Insert the "new" entry in the correct block.
+        */
+       if (state->inleaf)
+               error = xfs_attr_leaf_add(oldblk->bp, state->args);
+       else
+               error = xfs_attr_leaf_add(newblk->bp, state->args);
+
+       /*
+        * Update last hashval in each block since we added the name.
+        */
+       oldblk->hashval = xfs_attr_leaf_lasthash(oldblk->bp, NULL);
+       newblk->hashval = xfs_attr_leaf_lasthash(newblk->bp, NULL);
+       return(error);
+}
+
+/*
+ * Add a name to the leaf attribute list structure.
+ */
+int
+xfs_attr_leaf_add(xfs_dabuf_t *bp, xfs_da_args_t *args)
+{
+       xfs_attr_leafblock_t *leaf;
+       xfs_attr_leaf_hdr_t *hdr;
+       xfs_attr_leaf_map_t *map;
+       int tablesize, entsize, sum, tmp, i;
+
+       leaf = bp->data;
+       ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT)
+                                               == XFS_ATTR_LEAF_MAGIC);
+       ASSERT((args->index >= 0)
+               && (args->index <= INT_GET(leaf->hdr.count, ARCH_CONVERT)));
+       hdr = &leaf->hdr;
+       entsize = xfs_attr_leaf_newentsize(args,
+                          args->trans->t_mountp->m_sb.sb_blocksize, NULL);
+
+       /*
+        * Search through freemap for first-fit on new name length.
+        * (may need to figure in size of entry struct too)
+        */
+       tablesize = (INT_GET(hdr->count, ARCH_CONVERT) + 1)
+                                       * sizeof(xfs_attr_leaf_entry_t)
+                                       + sizeof(xfs_attr_leaf_hdr_t);
+       map = &hdr->freemap[XFS_ATTR_LEAF_MAPSIZE-1];
+       for (sum = 0, i = XFS_ATTR_LEAF_MAPSIZE-1; i >= 0; map--, i--) {
+               if (tablesize > INT_GET(hdr->firstused, ARCH_CONVERT)) {
+                       sum += INT_GET(map->size, ARCH_CONVERT);
+                       continue;
+               }
+               if (INT_GET(map->size, ARCH_CONVERT) == 0)
+                       continue;       /* no space in this map */
+               tmp = entsize;
+               if (INT_GET(map->base, ARCH_CONVERT)
+                               < INT_GET(hdr->firstused, ARCH_CONVERT))
+                       tmp += sizeof(xfs_attr_leaf_entry_t);
+               if (INT_GET(map->size, ARCH_CONVERT) >= tmp) {
+                       tmp = xfs_attr_leaf_add_work(bp, args, i);
+                       return(tmp);
+               }
+               sum += INT_GET(map->size, ARCH_CONVERT);
+       }
+
+       /*
+        * If there are no holes in the address space of the block,
+        * and we don't have enough freespace, then compaction will do us
+        * no good and we should just give up.
+        */
+       if (!hdr->holes && (sum < entsize))
+               return(XFS_ERROR(ENOSPC));
+
+       /*
+        * Compact the entries to coalesce free space.
+        * This may change the hdr->count via dropping INCOMPLETE entries.
+        */
+       xfs_attr_leaf_compact(args->trans, bp);
+
+       /*
+        * After compaction, the block is guaranteed to have only one
+        * free region, in freemap[0].  If it is not big enough, give up.
+        */
+       if (INT_GET(hdr->freemap[0].size, ARCH_CONVERT)
+                               < (entsize + sizeof(xfs_attr_leaf_entry_t)))
+               return(XFS_ERROR(ENOSPC));
+
+       return(xfs_attr_leaf_add_work(bp, args, 0));
+}
+
+/*
+ * Add a name to a leaf attribute list structure.
+ */
+STATIC int
+xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
+{
+       xfs_attr_leafblock_t *leaf;
+       xfs_attr_leaf_hdr_t *hdr;
+       xfs_attr_leaf_entry_t *entry;
+       xfs_attr_leaf_name_local_t *name_loc;
+       xfs_attr_leaf_name_remote_t *name_rmt;
+       xfs_attr_leaf_map_t *map;
+       xfs_mount_t *mp;
+       int tmp, i;
+
+       leaf = bp->data;
+       ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT)
+                                               == XFS_ATTR_LEAF_MAGIC);
+       hdr = &leaf->hdr;
+       ASSERT((mapindex >= 0) && (mapindex < XFS_ATTR_LEAF_MAPSIZE));
+       ASSERT((args->index >= 0)
+               && (args->index <= INT_GET(hdr->count, ARCH_CONVERT)));
+
+       /*
+        * Force open some space in the entry array and fill it in.
+        */
+       entry = &leaf->entries[args->index];
+       if (args->index < INT_GET(hdr->count, ARCH_CONVERT)) {
+               tmp  = INT_GET(hdr->count, ARCH_CONVERT) - args->index;
+               tmp *= sizeof(xfs_attr_leaf_entry_t);
+               ovbcopy((char *)entry, (char *)(entry+1), tmp);
+               xfs_da_log_buf(args->trans, bp,
+                   XFS_DA_LOGRANGE(leaf, entry, tmp + sizeof(*entry)));
+       }
+       INT_MOD(hdr->count, ARCH_CONVERT, 1);
+
+       /*
+        * Allocate space for the new string (at the end of the run).
+        */
+       map = &hdr->freemap[mapindex];
+       mp = args->trans->t_mountp;
+       ASSERT(INT_GET(map->base, ARCH_CONVERT) < XFS_LBSIZE(mp));
+       ASSERT((INT_GET(map->base, ARCH_CONVERT) & 0x3) == 0);
+       ASSERT(INT_GET(map->size, ARCH_CONVERT)
+                               >= xfs_attr_leaf_newentsize(args,
+                                            mp->m_sb.sb_blocksize, NULL));
+       ASSERT(INT_GET(map->size, ARCH_CONVERT) < XFS_LBSIZE(mp));
+       ASSERT((INT_GET(map->size, ARCH_CONVERT) & 0x3) == 0);
+       INT_MOD(map->size, ARCH_CONVERT,
+               -xfs_attr_leaf_newentsize(args, mp->m_sb.sb_blocksize, &tmp));
+       INT_SET(entry->nameidx, ARCH_CONVERT,
+                                       INT_GET(map->base, ARCH_CONVERT)
+                                     + INT_GET(map->size, ARCH_CONVERT));
+       INT_SET(entry->hashval, ARCH_CONVERT, args->hashval);
+       entry->flags = tmp ? XFS_ATTR_LOCAL : 0;
+       entry->flags |= (args->flags & ATTR_ROOT) ? XFS_ATTR_ROOT : 0;
+       if (args->rename) {
+               entry->flags |= XFS_ATTR_INCOMPLETE;
+               if ((args->blkno2 == args->blkno) &&
+                   (args->index2 <= args->index)) {
+                       args->index2++;
+               }
+       }
+       xfs_da_log_buf(args->trans, bp,
+                         XFS_DA_LOGRANGE(leaf, entry, sizeof(*entry)));
+       ASSERT((args->index == 0) || (INT_GET(entry->hashval, ARCH_CONVERT)
+                                               >= INT_GET((entry-1)->hashval,
+                                                           ARCH_CONVERT)));
+       ASSERT((args->index == INT_GET(hdr->count, ARCH_CONVERT)-1) ||
+              (INT_GET(entry->hashval, ARCH_CONVERT)
+                           <= (INT_GET((entry+1)->hashval, ARCH_CONVERT))));
+
+       /*
+        * Copy the attribute name and value into the new space.
+        *
+        * For "remote" attribute values, simply note that we need to 
+        * allocate space for the "remote" value.  We can't actually
+        * allocate the extents in this transaction, and we can't decide
+        * which blocks they should be as we might allocate more blocks
+        * as part of this transaction (a split operation for example).
+        */
+       if (entry->flags & XFS_ATTR_LOCAL) {
+               name_loc = XFS_ATTR_LEAF_NAME_LOCAL(leaf, args->index);
+               name_loc->namelen = args->namelen;
+               INT_SET(name_loc->valuelen, ARCH_CONVERT, args->valuelen);
+               bcopy(args->name, (char *)name_loc->nameval, args->namelen);
+               bcopy(args->value, (char *)&name_loc->nameval[args->namelen],
+                                  INT_GET(name_loc->valuelen, ARCH_CONVERT));
+       } else {
+               name_rmt = XFS_ATTR_LEAF_NAME_REMOTE(leaf, args->index);
+               name_rmt->namelen = args->namelen;
+               bcopy(args->name, (char *)name_rmt->name, args->namelen);
+               entry->flags |= XFS_ATTR_INCOMPLETE;
+               /* just in case */
+               INT_SET(name_rmt->valuelen, ARCH_CONVERT, 0);
+               INT_SET(name_rmt->valueblk, ARCH_CONVERT, 0);
+               args->rmtblkno = 1;
+               args->rmtblkcnt = XFS_B_TO_FSB(mp, args->valuelen);
+       }
+       xfs_da_log_buf(args->trans, bp,
+            XFS_DA_LOGRANGE(leaf, XFS_ATTR_LEAF_NAME(leaf, args->index),
+                                  xfs_attr_leaf_entsize(leaf, args->index)));
+
+       /*
+        * Update the control info for this leaf node
+        */
+       if (INT_GET(entry->nameidx, ARCH_CONVERT)
+                               < INT_GET(hdr->firstused, ARCH_CONVERT)) {
+               INT_SET(hdr->firstused, ARCH_CONVERT,
+                                       INT_GET(entry->nameidx, ARCH_CONVERT));
+       }
+       ASSERT(INT_GET(hdr->firstused, ARCH_CONVERT)
+                               >= ((INT_GET(hdr->count, ARCH_CONVERT)
+                                       * sizeof(*entry))+sizeof(*hdr)));
+       tmp = (INT_GET(hdr->count, ARCH_CONVERT)-1)
+                                       * sizeof(xfs_attr_leaf_entry_t)
+                                       + sizeof(xfs_attr_leaf_hdr_t);
+       map = &hdr->freemap[0];
+       for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; map++, i++) {
+               if (INT_GET(map->base, ARCH_CONVERT) == tmp) {
+                       INT_MOD(map->base, ARCH_CONVERT,
+                                       sizeof(xfs_attr_leaf_entry_t));
+                       INT_MOD(map->size, ARCH_CONVERT,
+                                       -sizeof(xfs_attr_leaf_entry_t));
+               }
+       }
+       INT_MOD(hdr->usedbytes, ARCH_CONVERT,
+                               xfs_attr_leaf_entsize(leaf, args->index));
+       xfs_da_log_buf(args->trans, bp,
+               XFS_DA_LOGRANGE(leaf, hdr, sizeof(*hdr)));
+       return(0);
+}
+
+/*
+ * Garbage collect a leaf attribute list block by copying it to a new buffer.
+ */
+STATIC void
+xfs_attr_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *bp)
+{
+       xfs_attr_leafblock_t *leaf_s, *leaf_d;
+       xfs_attr_leaf_hdr_t *hdr_s, *hdr_d;
+       xfs_mount_t *mp;
+       char *tmpbuffer;
+
+       mp = trans->t_mountp;
+       tmpbuffer = kmem_alloc(XFS_LBSIZE(mp), KM_SLEEP);
+       ASSERT(tmpbuffer != NULL);
+       bcopy(bp->data, tmpbuffer, XFS_LBSIZE(mp));
+       bzero(bp->data, XFS_LBSIZE(mp));
+
+       /*
+        * Copy basic information
+        */
+       leaf_s = (xfs_attr_leafblock_t *)tmpbuffer;
+       leaf_d = bp->data;
+       hdr_s = &leaf_s->hdr;
+       hdr_d = &leaf_d->hdr;
+       hdr_d->info = hdr_s->info;      /* struct copy */
+       INT_SET(hdr_d->firstused, ARCH_CONVERT, XFS_LBSIZE(mp));
+       /* handle truncation gracefully */
+       if (INT_GET(hdr_d->firstused, ARCH_CONVERT) == 0) {
+               INT_SET(hdr_d->firstused, ARCH_CONVERT,
+                               XFS_LBSIZE(mp) - XFS_ATTR_LEAF_NAME_ALIGN);
+       }
+       INT_SET(hdr_d->usedbytes, ARCH_CONVERT, 0);
+       INT_SET(hdr_d->count, ARCH_CONVERT, 0);
+       hdr_d->holes = 0;
+       INT_SET(hdr_d->freemap[0].base, ARCH_CONVERT,
+                                       sizeof(xfs_attr_leaf_hdr_t));
+       INT_SET(hdr_d->freemap[0].size, ARCH_CONVERT,
+                               INT_GET(hdr_d->firstused, ARCH_CONVERT)
+                             - INT_GET(hdr_d->freemap[0].base, ARCH_CONVERT));
+
+       /*
+        * Copy all entry's in the same (sorted) order,
+        * but allocate name/value pairs packed and in sequence.
+        */
+       xfs_attr_leaf_moveents(leaf_s, 0, leaf_d, 0,
+                               (int)INT_GET(hdr_s->count, ARCH_CONVERT), mp);
+
+       xfs_da_log_buf(trans, bp, 0, XFS_LBSIZE(mp) - 1);
+
+       kmem_free(tmpbuffer, XFS_LBSIZE(mp));
+}
+
+/*
+ * Redistribute the attribute list entries between two leaf nodes,
+ * taking into account the size of the new entry.
+ *
+ * NOTE: if new block is empty, then it will get the upper half of the
+ * old block.  At present, all (one) callers pass in an empty second block.
+ *
+ * This code adjusts the args->index/blkno and args->index2/blkno2 fields
+ * to match what it is doing in splitting the attribute leaf block.  Those
+ * values are used in "atomic rename" operations on attributes.  Note that
+ * the "new" and "old" values can end up in different blocks.
+ */
+STATIC void
+xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
+                                      xfs_da_state_blk_t *blk2)
+{
+       xfs_da_args_t *args;
+       xfs_da_state_blk_t *tmp_blk;
+       xfs_attr_leafblock_t *leaf1, *leaf2;
+       xfs_attr_leaf_hdr_t *hdr1, *hdr2;
+       int count, totallen, max, space, swap;
+
+       /*
+        * Set up environment.
+        */
+       ASSERT(blk1->magic == XFS_ATTR_LEAF_MAGIC);
+       ASSERT(blk2->magic == XFS_ATTR_LEAF_MAGIC);
+       leaf1 = blk1->bp->data;
+       leaf2 = blk2->bp->data;
+       ASSERT(INT_GET(leaf1->hdr.info.magic, ARCH_CONVERT)
+                                               == XFS_ATTR_LEAF_MAGIC);
+       ASSERT(INT_GET(leaf2->hdr.info.magic, ARCH_CONVERT)
+                                               == XFS_ATTR_LEAF_MAGIC);
+       args = state->args;
+
+       /*
+        * Check ordering of blocks, reverse if it makes things simpler.
+        *
+        * NOTE: Given that all (current) callers pass in an empty
+        * second block, this code should never set "swap".
+        */
+       swap = 0;
+       if (xfs_attr_leaf_order(blk1->bp, blk2->bp)) {
+               tmp_blk = blk1;
+               blk1 = blk2;
+               blk2 = tmp_blk;
+               leaf1 = blk1->bp->data;
+               leaf2 = blk2->bp->data;
+               swap = 1;
+       }
+       hdr1 = &leaf1->hdr;
+       hdr2 = &leaf2->hdr;
+
+       /*
+        * Examine entries until we reduce the absolute difference in
+        * byte usage between the two blocks to a minimum.  Then get
+        * the direction to copy and the number of elements to move.
+        *
+        * "inleaf" is true if the new entry should be inserted into blk1.
+        * If "swap" is also true, then reverse the sense of "inleaf".
+        */
+       state->inleaf = xfs_attr_leaf_figure_balance(state, blk1, blk2,
+                                                           &count, &totallen);
+       if (swap)
+               state->inleaf = !state->inleaf;
+
+       /*
+        * Move any entries required from leaf to leaf:
+        */
+       if (count < INT_GET(hdr1->count, ARCH_CONVERT)) {
+               /*
+                * Figure the total bytes to be added to the destination leaf.
+                */
+               /* number entries being moved */
+               count = INT_GET(hdr1->count, ARCH_CONVERT) - count;
+               space  = INT_GET(hdr1->usedbytes, ARCH_CONVERT) - totallen;
+               space += count * sizeof(xfs_attr_leaf_entry_t);
+
+               /*
+                * leaf2 is the destination, compact it if it looks tight.
+                */
+               max  = INT_GET(hdr2->firstused, ARCH_CONVERT)
+                                               - sizeof(xfs_attr_leaf_hdr_t);
+               max -= INT_GET(hdr2->count, ARCH_CONVERT)
+                                       * sizeof(xfs_attr_leaf_entry_t);
+               if (space > max) {
+                       xfs_attr_leaf_compact(args->trans, blk2->bp);
+               }
+
+               /*
+                * Move high entries from leaf1 to low end of leaf2.
+                */
+               xfs_attr_leaf_moveents(leaf1,
+                               INT_GET(hdr1->count, ARCH_CONVERT)-count,
+                               leaf2, 0, count, state->mp);
+
+               xfs_da_log_buf(args->trans, blk1->bp, 0, state->blocksize-1);
+               xfs_da_log_buf(args->trans, blk2->bp, 0, state->blocksize-1);
+       } else if (count > INT_GET(hdr1->count, ARCH_CONVERT)) {
+               /*
+                * I assert that since all callers pass in an empty
+                * second buffer, this code should never execute.
+                */
+
+               /*
+                * Figure the total bytes to be added to the destination leaf.
+                */
+               /* number entries being moved */
+               count -= INT_GET(hdr1->count, ARCH_CONVERT);
+               space  = totallen - INT_GET(hdr1->usedbytes, ARCH_CONVERT);
+               space += count * sizeof(xfs_attr_leaf_entry_t);
+
+               /*
+                * leaf1 is the destination, compact it if it looks tight.
+                */
+               max  = INT_GET(hdr1->firstused, ARCH_CONVERT)
+                                               - sizeof(xfs_attr_leaf_hdr_t);
+               max -= INT_GET(hdr1->count, ARCH_CONVERT)
+                                       * sizeof(xfs_attr_leaf_entry_t);
+               if (space > max) {
+                       xfs_attr_leaf_compact(args->trans, blk1->bp);
+               }
+
+               /*
+                * Move low entries from leaf2 to high end of leaf1.
+                */
+               xfs_attr_leaf_moveents(leaf2, 0, leaf1,
+                               (int)INT_GET(hdr1->count, ARCH_CONVERT), count,
+                               state->mp);
+
+               xfs_da_log_buf(args->trans, blk1->bp, 0, state->blocksize-1);
+               xfs_da_log_buf(args->trans, blk2->bp, 0, state->blocksize-1);
+       }
+
+       /*
+        * Copy out last hashval in each block for B-tree code.
+        */
+       blk1->hashval =
+           INT_GET(leaf1->entries[INT_GET(leaf1->hdr.count,
+                                   ARCH_CONVERT)-1].hashval, ARCH_CONVERT);
+       blk2->hashval =
+           INT_GET(leaf2->entries[INT_GET(leaf2->hdr.count,
+                                   ARCH_CONVERT)-1].hashval, ARCH_CONVERT);
+
+       /*
+        * Adjust the expected index for insertion.
+        * NOTE: this code depends on the (current) situation that the
+        * second block was originally empty.
+        *
+        * If the insertion point moved to the 2nd block, we must adjust
+        * the index.  We must also track the entry just following the
+        * new entry for use in an "atomic rename" operation, that entry
+        * is always the "old" entry and the "new" entry is what we are
+        * inserting.  The index/blkno fields refer to the "old" entry,
+        * while the index2/blkno2 fields refer to the "new" entry.
+        */
+       if (blk1->index > INT_GET(leaf1->hdr.count, ARCH_CONVERT)) {
+               ASSERT(state->inleaf == 0);
+               blk2->index = blk1->index
+                               - INT_GET(leaf1->hdr.count, ARCH_CONVERT);
+               args->index = args->index2 = blk2->index;
+               args->blkno = args->blkno2 = blk2->blkno;
+       } else if (blk1->index == INT_GET(leaf1->hdr.count, ARCH_CONVERT)) {
+               if (state->inleaf) {
+                       args->index = blk1->index;
+                       args->blkno = blk1->blkno;
+                       args->index2 = 0;
+                       args->blkno2 = blk2->blkno;
+               } else {
+                       blk2->index = blk1->index
+                                   - INT_GET(leaf1->hdr.count, ARCH_CONVERT);
+                       args->index = args->index2 = blk2->index;
+                       args->blkno = args->blkno2 = blk2->blkno;
+               }
+       } else {
+               ASSERT(state->inleaf == 1);
+               args->index = args->index2 = blk1->index;
+               args->blkno = args->blkno2 = blk1->blkno;
+       }
+}
+
+/*
+ * Examine entries until we reduce the absolute difference in
+ * byte usage between the two blocks to a minimum.
+ * GROT: Is this really necessary?  With other than a 512 byte blocksize,
+ * GROT: there will always be enough room in either block for a new entry.
+ * GROT: Do a double-split for this case?
+ */
+STATIC int
+xfs_attr_leaf_figure_balance(xfs_da_state_t *state,
+                                   xfs_da_state_blk_t *blk1,
+                                   xfs_da_state_blk_t *blk2,
+                                   int *countarg, int *usedbytesarg)
+{
+       xfs_attr_leafblock_t *leaf1, *leaf2;
+       xfs_attr_leaf_hdr_t *hdr1, *hdr2;
+       xfs_attr_leaf_entry_t *entry;
+       int count, max, index, totallen, half;
+       int lastdelta, foundit, tmp;
+
+       /*
+        * Set up environment.
+        */
+       leaf1 = blk1->bp->data;
+       leaf2 = blk2->bp->data;
+       hdr1 = &leaf1->hdr;
+       hdr2 = &leaf2->hdr;
+       foundit = 0;
+       totallen = 0;
+
+       /*
+        * Examine entries until we reduce the absolute difference in
+        * byte usage between the two blocks to a minimum.
+        */
+       max = INT_GET(hdr1->count, ARCH_CONVERT)
+                       + INT_GET(hdr2->count, ARCH_CONVERT);
+       half  = (max+1) * sizeof(*entry);
+       half += INT_GET(hdr1->usedbytes, ARCH_CONVERT)
+                               + INT_GET(hdr2->usedbytes, ARCH_CONVERT)
+                               + xfs_attr_leaf_newentsize(state->args,
+                                                    state->blocksize, NULL);
+       half /= 2;
+       lastdelta = state->blocksize;
+       entry = &leaf1->entries[0];
+       for (count = index = 0; count < max; entry++, index++, count++) {
+
+#define XFS_ATTR_ABS(A)        (((A) < 0) ? -(A) : (A))
+               /*
+                * The new entry is in the first block, account for it.
+                */
+               if (count == blk1->index) {
+                       tmp = totallen + sizeof(*entry) +
+                               xfs_attr_leaf_newentsize(state->args,
+                                                        state->blocksize,
+                                                        NULL);
+                       if (XFS_ATTR_ABS(half - tmp) > lastdelta)
+                               break;
+                       lastdelta = XFS_ATTR_ABS(half - tmp);
+                       totallen = tmp;
+                       foundit = 1;
+               }
+
+               /*
+                * Wrap around into the second block if necessary.
+                */
+               if (count == INT_GET(hdr1->count, ARCH_CONVERT)) {
+                       leaf1 = leaf2;
+                       entry = &leaf1->entries[0];
+                       index = 0;
+               }
+
+               /*
+                * Figure out if next leaf entry would be too much.
+                */
+               tmp = totallen + sizeof(*entry) + xfs_attr_leaf_entsize(leaf1,
+                                                                       index);
+               if (XFS_ATTR_ABS(half - tmp) > lastdelta)
+                       break;
+               lastdelta = XFS_ATTR_ABS(half - tmp);
+               totallen = tmp;
+#undef XFS_ATTR_ABS
+       }
+
+       /*
+        * Calculate the number of usedbytes that will end up in lower block.
+        * If new entry not in lower block, fix up the count.
+        */
+       totallen -= count * sizeof(*entry);
+       if (foundit) {
+               totallen -= sizeof(*entry) + 
+                               xfs_attr_leaf_newentsize(state->args,
+                                                        state->blocksize,
+                                                        NULL);
+       }
+
+       *countarg = count;
+       *usedbytesarg = totallen;
+       return(foundit);
+}
+
+/*========================================================================
+ * Routines used for shrinking the Btree.
+ *========================================================================*/
+
+/*
+ * Check a leaf block and its neighbors to see if the block should be
+ * collapsed into one or the other neighbor.  Always keep the block
+ * with the smaller block number.
+ * If the current block is over 50% full, don't try to join it, return 0.
+ * If the block is empty, fill in the state structure and return 2.
+ * If it can be collapsed, fill in the state structure and return 1.
+ * If nothing can be done, return 0.
+ *
+ * GROT: allow for INCOMPLETE entries in calculation.
+ */
+int
+xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
+{
+       xfs_attr_leafblock_t *leaf;
+       xfs_da_state_blk_t *blk;
+       xfs_da_blkinfo_t *info;
+       int count, bytes, forward, error, retval, i;
+       xfs_dablk_t blkno;
+       xfs_dabuf_t *bp;
+
+       /*
+        * Check for the degenerate case of the block being over 50% full.
+        * If so, it's not worth even looking to see if we might be able
+        * to coalesce with a sibling.
+        */
+       blk = &state->path.blk[ state->path.active-1 ];
+       info = blk->bp->data;
+       ASSERT(INT_GET(info->magic, ARCH_CONVERT) == XFS_ATTR_LEAF_MAGIC);
+       leaf = (xfs_attr_leafblock_t *)info;
+       count = INT_GET(leaf->hdr.count, ARCH_CONVERT);
+       bytes = sizeof(xfs_attr_leaf_hdr_t) +
+               count * sizeof(xfs_attr_leaf_entry_t) +
+               INT_GET(leaf->hdr.usedbytes, ARCH_CONVERT);
+       if (bytes > (state->blocksize >> 1)) {
+               *action = 0;    /* blk over 50%, dont try to join */
+               return(0);
+       }
+
+       /*
+        * Check for the degenerate case of the block being empty.
+        * If the block is empty, we'll simply delete it, no need to
+        * coalesce it with a sibling block.  We choose (aribtrarily)
+        * to merge with the forward block unless it is NULL.
+        */
+       if (count == 0) {
+               /*
+                * Make altpath point to the block we want to keep and
+                * path point to the block we want to drop (this one).
+                */
+               forward = (INT_GET(info->forw, ARCH_CONVERT) != 0);
+               bcopy(&state->path, &state->altpath, sizeof(state->path));
+               error = xfs_da_path_shift(state, &state->altpath, forward,
+                                                0, &retval);
+               if (error)
+                       return(error);
+               if (retval) {
+                       *action = 0;
+               } else {
+                       *action = 2;
+               }
+               return(0);
+       }
+
+       /*
+        * Examine each sibling block to see if we can coalesce with
+        * at least 25% free space to spare.  We need to figure out
+        * whether to merge with the forward or the backward block.
+        * We prefer coalescing with the lower numbered sibling so as
+        * to shrink an attribute list over time.
+        */
+       /* start with smaller blk num */
+       forward = (INT_GET(info->forw, ARCH_CONVERT)
+                                       < INT_GET(info->back, ARCH_CONVERT));
+       for (i = 0; i < 2; forward = !forward, i++) {
+               if (forward)
+                       blkno = INT_GET(info->forw, ARCH_CONVERT);
+               else
+                       blkno = INT_GET(info->back, ARCH_CONVERT);
+               if (blkno == 0)
+                       continue;
+               error = xfs_da_read_buf(state->args->trans, state->args->dp,
+                                       blkno, -1, &bp, XFS_ATTR_FORK);
+               if (error)
+                       return(error);
+               ASSERT(bp != NULL);
+
+               leaf = (xfs_attr_leafblock_t *)info;
+               count  = INT_GET(leaf->hdr.count, ARCH_CONVERT);
+               bytes  = state->blocksize - (state->blocksize>>2);
+               bytes -= INT_GET(leaf->hdr.usedbytes, ARCH_CONVERT);
+               leaf = bp->data;
+               ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT)
+                                               == XFS_ATTR_LEAF_MAGIC);
+               count += INT_GET(leaf->hdr.count, ARCH_CONVERT);
+               bytes -= INT_GET(leaf->hdr.usedbytes, ARCH_CONVERT);
+               bytes -= count * sizeof(xfs_attr_leaf_entry_t);
+               bytes -= sizeof(xfs_attr_leaf_hdr_t);
+               xfs_da_brelse(state->args->trans, bp);
+               if (bytes >= 0)
+                       break;  /* fits with at least 25% to spare */
+       }
+       if (i >= 2) {
+               *action = 0;
+               return(0);
+       }
+
+       /*
+        * Make altpath point to the block we want to keep (the lower
+        * numbered block) and path point to the block we want to drop.
+        */
+       bcopy(&state->path, &state->altpath, sizeof(state->path));
+       if (blkno < blk->blkno) {
+               error = xfs_da_path_shift(state, &state->altpath, forward,
+                                                0, &retval);
+       } else {
+               error = xfs_da_path_shift(state, &state->path, forward,
+                                                0, &retval);
+       }
+       if (error)
+               return(error);
+       if (retval) {
+               *action = 0;
+       } else {
+               *action = 1;
+       }
+       return(0);
+}
+
+/*
+ * Move all the attribute list entries from drop_leaf into save_leaf.
+ */
+void
+xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
+                                      xfs_da_state_blk_t *save_blk)
+{
+       xfs_attr_leafblock_t *drop_leaf, *save_leaf, *tmp_leaf;
+       xfs_attr_leaf_hdr_t *drop_hdr, *save_hdr, *tmp_hdr;
+       xfs_mount_t *mp;
+       char *tmpbuffer;
+
+       /*
+        * Set up environment.
+        */
+       mp = state->mp;
+       ASSERT(drop_blk->magic == XFS_ATTR_LEAF_MAGIC);
+       ASSERT(save_blk->magic == XFS_ATTR_LEAF_MAGIC);
+       drop_leaf = drop_blk->bp->data;
+       save_leaf = save_blk->bp->data;
+       ASSERT(INT_GET(drop_leaf->hdr.info.magic, ARCH_CONVERT)
+                                               == XFS_ATTR_LEAF_MAGIC);
+       ASSERT(INT_GET(save_leaf->hdr.info.magic, ARCH_CONVERT)
+                                               == XFS_ATTR_LEAF_MAGIC);
+       drop_hdr = &drop_leaf->hdr;
+       save_hdr = &save_leaf->hdr;
+
+       /*
+        * Save last hashval from dying block for later Btree fixup.
+        */
+       drop_blk->hashval =
+               INT_GET(drop_leaf->entries[INT_GET(drop_leaf->hdr.count,
+                                               ARCH_CONVERT)-1].hashval,
+                                                               ARCH_CONVERT);
+
+       /*
+        * Check if we need a temp buffer, or can we do it in place.
+        * Note that we don't check "leaf" for holes because we will
+        * always be dropping it, toosmall() decided that for us already.
+        */
+       if (save_hdr->holes == 0) {
+               /*
+                * dest leaf has no holes, so we add there.  May need
+                * to make some room in the entry array.
+                */
+               if (xfs_attr_leaf_order(save_blk->bp, drop_blk->bp)) {
+                       xfs_attr_leaf_moveents(drop_leaf, 0, save_leaf, 0,
+                            (int)INT_GET(drop_hdr->count, ARCH_CONVERT), mp);
+               } else {
+                       xfs_attr_leaf_moveents(drop_leaf, 0, save_leaf,
+                                 INT_GET(save_hdr->count, ARCH_CONVERT),
+                                 (int)INT_GET(drop_hdr->count, ARCH_CONVERT),
+                                 mp);
+               }
+       } else {
+               /*
+                * Destination has holes, so we make a temporary copy
+                * of the leaf and add them both to that.
+                */
+               tmpbuffer = kmem_alloc(state->blocksize, KM_SLEEP);
+               ASSERT(tmpbuffer != NULL);
+               bzero(tmpbuffer, state->blocksize);
+               tmp_leaf = (xfs_attr_leafblock_t *)tmpbuffer;
+               tmp_hdr = &tmp_leaf->hdr;
+               tmp_hdr->info = save_hdr->info; /* struct copy */
+               INT_SET(tmp_hdr->count, ARCH_CONVERT, 0);
+               INT_SET(tmp_hdr->firstused, ARCH_CONVERT, state->blocksize);
+               if (INT_GET(tmp_hdr->firstused, ARCH_CONVERT) == 0) {
+                       INT_SET(tmp_hdr->firstused, ARCH_CONVERT,
+                               state->blocksize - XFS_ATTR_LEAF_NAME_ALIGN);
+               }
+               INT_SET(tmp_hdr->usedbytes, ARCH_CONVERT, 0);
+               if (xfs_attr_leaf_order(save_blk->bp, drop_blk->bp)) {
+                       xfs_attr_leaf_moveents(drop_leaf, 0, tmp_leaf, 0,
+                               (int)INT_GET(drop_hdr->count, ARCH_CONVERT),
+                               mp);
+                       xfs_attr_leaf_moveents(save_leaf, 0, tmp_leaf,
+                                 INT_GET(tmp_leaf->hdr.count, ARCH_CONVERT),
+                                (int)INT_GET(save_hdr->count, ARCH_CONVERT),
+                                mp);
+               } else {
+                       xfs_attr_leaf_moveents(save_leaf, 0, tmp_leaf, 0,
+                               (int)INT_GET(save_hdr->count, ARCH_CONVERT),
+                               mp);
+                       xfs_attr_leaf_moveents(drop_leaf, 0, tmp_leaf,
+                               INT_GET(tmp_leaf->hdr.count, ARCH_CONVERT),
+                               (int)INT_GET(drop_hdr->count, ARCH_CONVERT),
+                               mp);
+               }
+               bcopy((char *)tmp_leaf, (char *)save_leaf, state->blocksize);
+               kmem_free(tmpbuffer, state->blocksize);
+       }
+
+       xfs_da_log_buf(state->args->trans, save_blk->bp, 0,
+                                          state->blocksize - 1);
+
+       /*
+        * Copy out last hashval in each block for B-tree code.
+        */
+       save_blk->hashval =
+               INT_GET(save_leaf->entries[INT_GET(save_leaf->hdr.count,
+                                               ARCH_CONVERT)-1].hashval,
+                                                               ARCH_CONVERT);
+}
+
+
+/*========================================================================
+ * Utility routines.
+ *========================================================================*/
+
+/*
+ * Move the indicated entries from one leaf to another.
+ * NOTE: this routine modifies both source and destination leaves.
+ */
+/*ARGSUSED*/
+STATIC void
+xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s,
+                       xfs_attr_leafblock_t *leaf_d, int start_d,
+                       int count, xfs_mount_t *mp)
+{
+       xfs_attr_leaf_hdr_t *hdr_s, *hdr_d;
+       xfs_attr_leaf_entry_t *entry_s, *entry_d;
+       int desti, tmp, i;
+
+       /*
+        * Check for nothing to do.
+        */
+       if (count == 0)
+               return;
+
+       /*
+        * Set up environment.
+        */
+       ASSERT(INT_GET(leaf_s->hdr.info.magic, ARCH_CONVERT)
+                                               == XFS_ATTR_LEAF_MAGIC);
+       ASSERT(INT_GET(leaf_d->hdr.info.magic, ARCH_CONVERT)
+                                               == XFS_ATTR_LEAF_MAGIC);
+       hdr_s = &leaf_s->hdr;
+       hdr_d = &leaf_d->hdr;
+       ASSERT((INT_GET(hdr_s->count, ARCH_CONVERT) > 0)
+                               && (INT_GET(hdr_s->count, ARCH_CONVERT)
+                                               < (XFS_LBSIZE(mp)/8)));
+       ASSERT(INT_GET(hdr_s->firstused, ARCH_CONVERT) >= 
+               ((INT_GET(hdr_s->count, ARCH_CONVERT)
+                                       * sizeof(*entry_s))+sizeof(*hdr_s)));
+       ASSERT(INT_GET(hdr_d->count, ARCH_CONVERT) < (XFS_LBSIZE(mp)/8));
+       ASSERT(INT_GET(hdr_d->firstused, ARCH_CONVERT) >= 
+               ((INT_GET(hdr_d->count, ARCH_CONVERT)
+                                       * sizeof(*entry_d))+sizeof(*hdr_d)));
+
+       ASSERT(start_s < INT_GET(hdr_s->count, ARCH_CONVERT));
+       ASSERT(start_d <= INT_GET(hdr_d->count, ARCH_CONVERT));
+       ASSERT(count <= INT_GET(hdr_s->count, ARCH_CONVERT));
+
+       /*
+        * Move the entries in the destination leaf up to make a hole?
+        */
+       if (start_d < INT_GET(hdr_d->count, ARCH_CONVERT)) {
+               tmp  = INT_GET(hdr_d->count, ARCH_CONVERT) - start_d;
+               tmp *= sizeof(xfs_attr_leaf_entry_t);
+               entry_s = &leaf_d->entries[start_d];
+               entry_d = &leaf_d->entries[start_d + count];
+               ovbcopy((char *)entry_s, (char *)entry_d, tmp);
+       }
+
+       /*
+        * Copy all entry's in the same (sorted) order,
+        * but allocate attribute info packed and in sequence.
+        */
+       entry_s = &leaf_s->entries[start_s];
+       entry_d = &leaf_d->entries[start_d];
+       desti = start_d;
+       for (i = 0; i < count; entry_s++, entry_d++, desti++, i++) {
+               ASSERT(INT_GET(entry_s->nameidx, ARCH_CONVERT)
+                               >= INT_GET(hdr_s->firstused, ARCH_CONVERT));
+               tmp = xfs_attr_leaf_entsize(leaf_s, start_s + i);
+#ifdef GROT
+               /*
+                * Code to drop INCOMPLETE entries.  Difficult to use as we
+                * may also need to change the insertion index.  Code turned
+                * off for 6.2, should be revisited later.
+                */
+               if (entry_s->flags & XFS_ATTR_INCOMPLETE) { /* skip partials? */
+                       bzero(XFS_ATTR_LEAF_NAME(leaf_s, start_s + i), tmp);
+                       INT_MOD(hdr_s->usedbytes, ARCH_CONVERT, -tmp);
+                       INT_MOD(hdr_s->count, ARCH_CONVERT, -1);
+                       entry_d--;      /* to compensate for ++ in loop hdr */
+                       desti--;
+                       if ((start_s + i) < offset)
+                               result++;       /* insertion index adjustment */
+               } else {
+#endif /* GROT */
+                       INT_MOD(hdr_d->firstused, ARCH_CONVERT, -tmp);
+                       INT_SET(entry_d->hashval, ARCH_CONVERT,
+                                   INT_GET(entry_s->hashval, ARCH_CONVERT));
+                       INT_SET(entry_d->nameidx, ARCH_CONVERT,
+                                               INT_GET(hdr_d->firstused,
+                                                               ARCH_CONVERT));
+                       entry_d->flags = entry_s->flags;
+                       ASSERT(INT_GET(entry_d->nameidx, ARCH_CONVERT) + tmp
+                                                       <= XFS_LBSIZE(mp));
+                       ovbcopy(XFS_ATTR_LEAF_NAME(leaf_s, start_s + i),
+                             XFS_ATTR_LEAF_NAME(leaf_d, desti), tmp);
+                       ASSERT(INT_GET(entry_s->nameidx, ARCH_CONVERT) + tmp
+                                                       <= XFS_LBSIZE(mp));
+                       bzero(XFS_ATTR_LEAF_NAME(leaf_s, start_s + i), tmp);
+                       INT_MOD(hdr_s->usedbytes, ARCH_CONVERT, -tmp);
+                       INT_MOD(hdr_d->usedbytes, ARCH_CONVERT, tmp);
+                       INT_MOD(hdr_s->count, ARCH_CONVERT, -1);
+                       INT_MOD(hdr_d->count, ARCH_CONVERT, 1);
+                       tmp = INT_GET(hdr_d->count, ARCH_CONVERT)
+                                               * sizeof(xfs_attr_leaf_entry_t)
+                                               + sizeof(xfs_attr_leaf_hdr_t);
+                       ASSERT(INT_GET(hdr_d->firstused, ARCH_CONVERT) >= tmp);
+#ifdef GROT
+               }
+#endif /* GROT */
+       }
+
+       /*
+        * Zero out the entries we just copied.
+        */
+       if (start_s == INT_GET(hdr_s->count, ARCH_CONVERT)) {
+               tmp = count * sizeof(xfs_attr_leaf_entry_t);
+               entry_s = &leaf_s->entries[start_s];
+               ASSERT(((char *)entry_s + tmp) <=
+                      ((char *)leaf_s + XFS_LBSIZE(mp)));
+               bzero((char *)entry_s, tmp);
+       } else {
+               /*
+                * Move the remaining entries down to fill the hole,
+                * then zero the entries at the top.
+                */
+               tmp  = INT_GET(hdr_s->count, ARCH_CONVERT) - count;
+               tmp *= sizeof(xfs_attr_leaf_entry_t);
+               entry_s = &leaf_s->entries[start_s + count];
+               entry_d = &leaf_s->entries[start_s];
+               ovbcopy((char *)entry_s, (char *)entry_d, tmp);
+
+               tmp = count * sizeof(xfs_attr_leaf_entry_t);
+               entry_s = &leaf_s->entries[INT_GET(hdr_s->count,
+                                                       ARCH_CONVERT)];
+               ASSERT(((char *)entry_s + tmp) <=
+                      ((char *)leaf_s + XFS_LBSIZE(mp)));
+               bzero((char *)entry_s, tmp);
+       }
+
+       /*
+        * Fill in the freemap information
+        */
+       INT_SET(hdr_d->freemap[0].base, ARCH_CONVERT,
+                                       sizeof(xfs_attr_leaf_hdr_t));
+       INT_MOD(hdr_d->freemap[0].base, ARCH_CONVERT,
+                               INT_GET(hdr_d->count, ARCH_CONVERT)
+                                       * sizeof(xfs_attr_leaf_entry_t));
+       INT_SET(hdr_d->freemap[0].size, ARCH_CONVERT,
+                               INT_GET(hdr_d->firstused, ARCH_CONVERT)
+                             - INT_GET(hdr_d->freemap[0].base, ARCH_CONVERT));
+       INT_SET(hdr_d->freemap[1].base, ARCH_CONVERT, 0);
+       INT_SET(hdr_d->freemap[2].base, ARCH_CONVERT, 0);
+       INT_SET(hdr_d->freemap[1].size, ARCH_CONVERT, 0);
+       INT_SET(hdr_d->freemap[2].size, ARCH_CONVERT, 0);
+       hdr_s->holes = 1;       /* leaf may not be compact */
+}
+
+/*
+ * Compare two leaf blocks "order".
+ * Return 0 unless leaf2 should go before leaf1.
+ */
+int
+xfs_attr_leaf_order(xfs_dabuf_t *leaf1_bp, xfs_dabuf_t *leaf2_bp)
+{
+       xfs_attr_leafblock_t *leaf1, *leaf2;
+
+       leaf1 = leaf1_bp->data;
+       leaf2 = leaf2_bp->data;
+       ASSERT((INT_GET(leaf1->hdr.info.magic, ARCH_CONVERT)
+                                               == XFS_ATTR_LEAF_MAGIC) &&
+              (INT_GET(leaf2->hdr.info.magic, ARCH_CONVERT)
+                                               == XFS_ATTR_LEAF_MAGIC));
+       if (   (INT_GET(leaf1->hdr.count, ARCH_CONVERT) > 0)
+           && (INT_GET(leaf2->hdr.count, ARCH_CONVERT) > 0)
+           && (   (INT_GET(leaf2->entries[ 0 ].hashval, ARCH_CONVERT) <
+                     INT_GET(leaf1->entries[ 0 ].hashval, ARCH_CONVERT))
+               || (INT_GET(leaf2->entries[INT_GET(leaf2->hdr.count,
+                               ARCH_CONVERT)-1].hashval, ARCH_CONVERT) <
+                     INT_GET(leaf1->entries[INT_GET(leaf1->hdr.count,
+                               ARCH_CONVERT)-1].hashval, ARCH_CONVERT))) ) {
+               return(1);
+       }
+       return(0);
+}
+
+/*
+ * Pick up the last hashvalue from a leaf block.
+ */
+xfs_dahash_t
+xfs_attr_leaf_lasthash(xfs_dabuf_t *bp, int *count)
+{
+       xfs_attr_leafblock_t *leaf;
+
+       leaf = bp->data;
+       ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT)
+                                               == XFS_ATTR_LEAF_MAGIC);
+       if (count)
+               *count = INT_GET(leaf->hdr.count, ARCH_CONVERT);
+       if (INT_GET(leaf->hdr.count, ARCH_CONVERT) == 0)
+               return(0);
+       return(INT_GET(leaf->entries[INT_GET(leaf->hdr.count,
+                               ARCH_CONVERT)-1].hashval, ARCH_CONVERT));
+}
+
+/*
+ * Calculate the number of bytes used to store the indicated attribute
+ * (whether local or remote only calculate bytes in this block).
+ */
+int
+xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index)
+{
+       xfs_attr_leaf_name_local_t *name_loc;
+       xfs_attr_leaf_name_remote_t *name_rmt;
+       int size;
+
+       ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT)
+                                               == XFS_ATTR_LEAF_MAGIC);
+       if (leaf->entries[index].flags & XFS_ATTR_LOCAL) {
+               name_loc = XFS_ATTR_LEAF_NAME_LOCAL(leaf, index);
+               size = XFS_ATTR_LEAF_ENTSIZE_LOCAL(name_loc->namelen,
+                                                  INT_GET(name_loc->valuelen,
+                                                               ARCH_CONVERT));
+       } else {
+               name_rmt = XFS_ATTR_LEAF_NAME_REMOTE(leaf, index);
+               size = XFS_ATTR_LEAF_ENTSIZE_REMOTE(name_rmt->namelen);
+       }
+       return(size);
+}
+
+/*
+ * Calculate the number of bytes that would be required to store the new
+ * attribute (whether local or remote only calculate bytes in this block).
+ * This routine decides as a side effect whether the attribute will be
+ * a "local" or a "remote" attribute.
+ */
+int
+xfs_attr_leaf_newentsize(xfs_da_args_t *args, int blocksize, int *local)
+{
+       int size;
+
+       size = XFS_ATTR_LEAF_ENTSIZE_LOCAL(args->namelen, args->valuelen);
+       if (size < XFS_ATTR_LEAF_ENTSIZE_LOCAL_MAX(blocksize)) { 
+               if (local) {
+                       *local = 1;
+               }
+       } else {
+               size = XFS_ATTR_LEAF_ENTSIZE_REMOTE(args->namelen);
+               if (local) {
+                       *local = 0;
+               }
+       }
+       return(size);
+}
diff --git a/libxfs/xfs_bit.c b/libxfs/xfs_bit.c
new file mode 100644 (file)
index 0000000..52ab69a
--- /dev/null
@@ -0,0 +1,307 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+/*
+ * XFS bit manipulation routines, used in non-realtime code.
+ */
+
+#include <xfs.h>
+
+/*
+ * Index of low bit number in byte, -1 for none set, 0..7 otherwise.
+ */
+const char xfs_lowbit[256] = {
+       -1, 0, 1, 0, 2, 0, 1, 0,                        /* 00 .. 07 */
+       3, 0, 1, 0, 2, 0, 1, 0,                 /* 08 .. 0f */
+       4, 0, 1, 0, 2, 0, 1, 0,                 /* 10 .. 17 */
+       3, 0, 1, 0, 2, 0, 1, 0,                 /* 18 .. 1f */
+       5, 0, 1, 0, 2, 0, 1, 0,                 /* 20 .. 27 */
+       3, 0, 1, 0, 2, 0, 1, 0,                 /* 28 .. 2f */
+       4, 0, 1, 0, 2, 0, 1, 0,                 /* 30 .. 37 */
+       3, 0, 1, 0, 2, 0, 1, 0,                 /* 38 .. 3f */
+       6, 0, 1, 0, 2, 0, 1, 0,                 /* 40 .. 47 */
+       3, 0, 1, 0, 2, 0, 1, 0,                 /* 48 .. 4f */
+       4, 0, 1, 0, 2, 0, 1, 0,                 /* 50 .. 57 */
+       3, 0, 1, 0, 2, 0, 1, 0,                 /* 58 .. 5f */
+       5, 0, 1, 0, 2, 0, 1, 0,                 /* 60 .. 67 */
+       3, 0, 1, 0, 2, 0, 1, 0,                 /* 68 .. 6f */
+       4, 0, 1, 0, 2, 0, 1, 0,                 /* 70 .. 77 */
+       3, 0, 1, 0, 2, 0, 1, 0,                 /* 78 .. 7f */
+       7, 0, 1, 0, 2, 0, 1, 0,                 /* 80 .. 87 */
+       3, 0, 1, 0, 2, 0, 1, 0,                 /* 88 .. 8f */
+       4, 0, 1, 0, 2, 0, 1, 0,                 /* 90 .. 97 */
+       3, 0, 1, 0, 2, 0, 1, 0,                 /* 98 .. 9f */
+       5, 0, 1, 0, 2, 0, 1, 0,                 /* a0 .. a7 */
+       3, 0, 1, 0, 2, 0, 1, 0,                 /* a8 .. af */
+       4, 0, 1, 0, 2, 0, 1, 0,                 /* b0 .. b7 */
+       3, 0, 1, 0, 2, 0, 1, 0,                 /* b8 .. bf */
+       6, 0, 1, 0, 2, 0, 1, 0,                 /* c0 .. c7 */
+       3, 0, 1, 0, 2, 0, 1, 0,                 /* c8 .. cf */
+       4, 0, 1, 0, 2, 0, 1, 0,                 /* d0 .. d7 */
+       3, 0, 1, 0, 2, 0, 1, 0,                 /* d8 .. df */
+       5, 0, 1, 0, 2, 0, 1, 0,                 /* e0 .. e7 */
+       3, 0, 1, 0, 2, 0, 1, 0,                 /* e8 .. ef */
+       4, 0, 1, 0, 2, 0, 1, 0,                 /* f0 .. f7 */
+       3, 0, 1, 0, 2, 0, 1, 0,                 /* f8 .. ff */
+};
+
+/*
+ * Index of high bit number in byte, -1 for none set, 0..7 otherwise.
+ */
+const char xfs_highbit[256] = {
+       -1, 0, 1, 1, 2, 2, 2, 2,                        /* 00 .. 07 */
+       3, 3, 3, 3, 3, 3, 3, 3,                 /* 08 .. 0f */
+       4, 4, 4, 4, 4, 4, 4, 4,                 /* 10 .. 17 */
+       4, 4, 4, 4, 4, 4, 4, 4,                 /* 18 .. 1f */
+       5, 5, 5, 5, 5, 5, 5, 5,                 /* 20 .. 27 */
+       5, 5, 5, 5, 5, 5, 5, 5,                 /* 28 .. 2f */
+       5, 5, 5, 5, 5, 5, 5, 5,                 /* 30 .. 37 */
+       5, 5, 5, 5, 5, 5, 5, 5,                 /* 38 .. 3f */
+       6, 6, 6, 6, 6, 6, 6, 6,                 /* 40 .. 47 */
+       6, 6, 6, 6, 6, 6, 6, 6,                 /* 48 .. 4f */
+       6, 6, 6, 6, 6, 6, 6, 6,                 /* 50 .. 57 */
+       6, 6, 6, 6, 6, 6, 6, 6,                 /* 58 .. 5f */
+       6, 6, 6, 6, 6, 6, 6, 6,                 /* 60 .. 67 */
+       6, 6, 6, 6, 6, 6, 6, 6,                 /* 68 .. 6f */
+       6, 6, 6, 6, 6, 6, 6, 6,                 /* 70 .. 77 */
+       6, 6, 6, 6, 6, 6, 6, 6,                 /* 78 .. 7f */
+       7, 7, 7, 7, 7, 7, 7, 7,                 /* 80 .. 87 */
+       7, 7, 7, 7, 7, 7, 7, 7,                 /* 88 .. 8f */
+       7, 7, 7, 7, 7, 7, 7, 7,                 /* 90 .. 97 */
+       7, 7, 7, 7, 7, 7, 7, 7,                 /* 98 .. 9f */
+       7, 7, 7, 7, 7, 7, 7, 7,                 /* a0 .. a7 */
+       7, 7, 7, 7, 7, 7, 7, 7,                 /* a8 .. af */
+       7, 7, 7, 7, 7, 7, 7, 7,                 /* b0 .. b7 */
+       7, 7, 7, 7, 7, 7, 7, 7,                 /* b8 .. bf */
+       7, 7, 7, 7, 7, 7, 7, 7,                 /* c0 .. c7 */
+       7, 7, 7, 7, 7, 7, 7, 7,                 /* c8 .. cf */
+       7, 7, 7, 7, 7, 7, 7, 7,                 /* d0 .. d7 */
+       7, 7, 7, 7, 7, 7, 7, 7,                 /* d8 .. df */
+       7, 7, 7, 7, 7, 7, 7, 7,                 /* e0 .. e7 */
+       7, 7, 7, 7, 7, 7, 7, 7,                 /* e8 .. ef */
+       7, 7, 7, 7, 7, 7, 7, 7,                 /* f0 .. f7 */
+       7, 7, 7, 7, 7, 7, 7, 7,                 /* f8 .. ff */
+};
+
+/*
+ * Count of bits set in byte, 0..8.
+ */
+const char xfs_countbit[256] = {
+       0, 1, 1, 2, 1, 2, 2, 3,                 /* 00 .. 07 */
+       1, 2, 2, 3, 2, 3, 3, 4,                 /* 08 .. 0f */
+       1, 2, 2, 3, 2, 3, 3, 4,                 /* 10 .. 17 */
+       2, 3, 3, 4, 3, 4, 4, 5,                 /* 18 .. 1f */
+       1, 2, 2, 3, 2, 3, 3, 4,                 /* 20 .. 27 */
+       2, 3, 3, 4, 3, 4, 4, 5,                 /* 28 .. 2f */
+       2, 3, 3, 4, 3, 4, 4, 5,                 /* 30 .. 37 */
+       3, 4, 4, 5, 4, 5, 5, 6,                 /* 38 .. 3f */
+       1, 2, 2, 3, 2, 3, 3, 4,                 /* 40 .. 47 */
+       2, 3, 3, 4, 3, 4, 4, 5,                 /* 48 .. 4f */
+       2, 3, 3, 4, 3, 4, 4, 5,                 /* 50 .. 57 */
+       3, 4, 4, 5, 4, 5, 5, 6,                 /* 58 .. 5f */
+       2, 3, 3, 4, 3, 4, 4, 5,                 /* 60 .. 67 */
+       3, 4, 4, 5, 4, 5, 5, 6,                 /* 68 .. 6f */
+       3, 4, 4, 5, 4, 5, 5, 6,                 /* 70 .. 77 */
+       4, 5, 5, 6, 5, 6, 6, 7,                 /* 78 .. 7f */
+       1, 2, 2, 3, 2, 3, 3, 4,                 /* 80 .. 87 */
+       2, 3, 3, 4, 3, 4, 4, 5,                 /* 88 .. 8f */
+       2, 3, 3, 4, 3, 4, 4, 5,                 /* 90 .. 97 */
+       3, 4, 4, 5, 4, 5, 5, 6,                 /* 98 .. 9f */
+       2, 3, 3, 4, 3, 4, 4, 5,                 /* a0 .. a7 */
+       3, 4, 4, 5, 4, 5, 5, 6,                 /* a8 .. af */
+       3, 4, 4, 5, 4, 5, 5, 6,                 /* b0 .. b7 */
+       4, 5, 5, 6, 5, 6, 6, 7,                 /* b8 .. bf */
+       2, 3, 3, 4, 3, 4, 4, 5,                 /* c0 .. c7 */
+       3, 4, 4, 5, 4, 5, 5, 6,                 /* c8 .. cf */
+       3, 4, 4, 5, 4, 5, 5, 6,                 /* d0 .. d7 */
+       4, 5, 5, 6, 5, 6, 6, 7,                 /* d8 .. df */
+       3, 4, 4, 5, 4, 5, 5, 6,                 /* e0 .. e7 */
+       4, 5, 5, 6, 5, 6, 6, 7,                 /* e8 .. ef */
+       4, 5, 5, 6, 5, 6, 6, 7,                 /* f0 .. f7 */
+       5, 6, 6, 7, 6, 7, 7, 8,                 /* f8 .. ff */
+};
+
+/*
+ * xfs_highbit32: get high bit set out of 32-bit argument, -1 if none set.
+ */
+int
+xfs_highbit32(
+       __uint32_t      v)
+{
+       int             i;
+
+       if (v & 0xffff0000)
+               if (v & 0xff000000)
+                       i = 24;
+               else
+                       i = 16;
+       else if (v & 0x0000ffff)
+               if (v & 0x0000ff00)
+                       i = 8;
+               else
+                       i = 0;
+       else
+               return -1;
+       return i + xfs_highbit[(v >> i) & 0xff];
+}
+
+/*
+ * xfs_lowbit64: get low bit set out of 64-bit argument, -1 if none set.
+ */
+int
+xfs_lowbit64(
+       __uint64_t      v)
+{
+       int             i;
+#if XFS_64
+       if (v & 0x00000000ffffffff)
+               if (v & 0x000000000000ffff)
+                       if (v & 0x00000000000000ff)
+                               i = 0;
+                       else
+                               i = 8;
+               else
+                       if (v & 0x0000000000ff0000)
+                               i = 16;
+                       else
+                               i = 24;
+       else if (v & 0xffffffff00000000)
+               if (v & 0x0000ffff00000000)
+                       if (v & 0x000000ff00000000)
+                               i = 32;
+                       else
+                               i = 40;
+               else
+                       if (v & 0x00ff000000000000)
+                               i = 48;
+                       else
+                               i = 56;
+       else
+               return -1;
+       return i + xfs_lowbit[(v >> i) & 0xff];
+#else
+       __uint32_t      vw;
+
+       if (vw = v) {
+               if (vw & 0x0000ffff)
+                       if (vw & 0x000000ff)
+                               i = 0;
+                       else
+                               i = 8;
+               else
+                       if (vw & 0x00ff0000)
+                               i = 16;
+                       else
+                               i = 24;
+               return i + xfs_lowbit[(vw >> i) & 0xff];
+       } else if (vw = v >> 32) {
+               if (vw & 0x0000ffff)
+                       if (vw & 0x000000ff)
+                               i = 32;
+                       else
+                               i = 40;
+               else
+                       if (vw & 0x00ff0000)
+                               i = 48;
+                       else
+                               i = 56;
+               return i + xfs_lowbit[(vw >> (i - 32)) & 0xff];
+       } else
+               return -1;
+#endif
+}
+
+/*
+ * xfs_highbit64: get high bit set out of 64-bit argument, -1 if none set.
+ */
+int
+xfs_highbit64(
+       __uint64_t      v)
+{
+       int             i;
+#if  XFS_64
+       if (v & 0xffffffff00000000)
+               if (v & 0xffff000000000000)
+                       if (v & 0xff00000000000000)
+                               i = 56;
+                       else
+                               i = 48;
+               else
+                       if (v & 0x0000ff0000000000)
+                               i = 40;
+                       else
+                               i = 32;
+       else if (v & 0x00000000ffffffff)
+               if (v & 0x00000000ffff0000)
+                       if (v & 0x00000000ff000000)
+                               i = 24;
+                       else
+                               i = 16;
+               else
+                       if (v & 0x000000000000ff00)
+                               i = 8;
+                       else
+                               i = 0;
+       else
+               return -1;
+       return i + xfs_highbit[(v >> i) & 0xff];
+#else
+       __uint32_t      vw;
+
+       if (vw = v >> 32) {
+               if (vw & 0xffff0000)
+                       if (vw & 0xff000000)
+                               i = 56;
+                       else
+                               i = 48;
+               else
+                       if (vw & 0x0000ff00)
+                               i = 40;
+                       else
+                               i = 32;
+               return i + xfs_highbit[(vw >> (i - 32)) & 0xff];
+       } else if (vw = v) {
+               if (vw & 0xffff0000)
+                       if (vw & 0xff000000)
+                               i = 24;
+                       else
+                               i = 16;
+               else
+                       if (vw & 0x0000ff00)
+                               i = 8;
+                       else
+                               i = 0;
+               return i + xfs_highbit[(vw >> i) & 0xff];
+       } else
+               return -1;
+#endif
+}
diff --git a/libxfs/xfs_bmap.c b/libxfs/xfs_bmap.c
new file mode 100644 (file)
index 0000000..88e597e
--- /dev/null
@@ -0,0 +1,4511 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <xfs.h>
+
+xfs_zone_t             *xfs_bmap_free_item_zone;
+
+/*
+ * Called by xfs_bmapi to update extent list structure and the btree
+ * after allocating space (or doing a delayed allocation).
+ */
+STATIC int                             /* error */
+xfs_bmap_add_extent(
+       xfs_inode_t             *ip,    /* incore inode pointer */
+       xfs_extnum_t            idx,    /* extent number to update/insert */
+       xfs_btree_cur_t         **curp, /* if *curp is null, not a btree */
+       xfs_bmbt_irec_t         *new,   /* new data to put in extent list */
+       xfs_fsblock_t           *first, /* pointer to firstblock variable */
+       xfs_bmap_free_t         *flist, /* list of extents to be freed */
+       int                     *logflagsp, /* inode logging flags */
+       int                     whichfork, /* data or attr fork */
+       int                     rsvd)   /* OK to use reserved data blocks */
+{
+       xfs_btree_cur_t         *cur;   /* btree cursor or null */
+       xfs_filblks_t           da_new; /* new count del alloc blocks used */
+       xfs_filblks_t           da_old; /* old count del alloc blocks used */
+       int                     error;  /* error return value */
+#ifdef XFS_BMAP_TRACE
+       static char             fname[] = "xfs_bmap_add_extent";
+#endif
+       xfs_ifork_t             *ifp;   /* inode fork ptr */
+       int                     logflags; /* returned value */
+       xfs_extnum_t            nextents; /* number of extents in file now */
+
+       XFS_STATS_INC(xs_add_exlist);
+       cur = *curp;
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+       ASSERT(idx <= nextents);
+       da_old = da_new = 0;
+       error = 0;
+       /*
+        * This is the first extent added to a new/empty file.
+        * Special case this one, so other routines get to assume there are
+        * already extents in the list.
+        */
+       if (nextents == 0) {
+               xfs_bmap_trace_insert(fname, "insert empty", ip, 0, 1, new,
+                       NULL, whichfork);
+               xfs_bmap_insert_exlist(ip, 0, 1, new, whichfork);
+               ASSERT(cur == NULL);
+               ifp->if_lastex = 0;
+               if (!ISNULLSTARTBLOCK(new->br_startblock)) {
+                       XFS_IFORK_NEXT_SET(ip, whichfork, 1);
+                       logflags = XFS_ILOG_CORE | XFS_ILOG_FEXT(whichfork);
+               } else
+                       logflags = 0;
+       }
+       /*
+        * Any kind of new delayed allocation goes here.
+        */
+       else if (ISNULLSTARTBLOCK(new->br_startblock)) {
+               if (cur)
+                       ASSERT((cur->bc_private.b.flags &
+                               XFS_BTCUR_BPRV_WASDEL) == 0);
+               if (error = xfs_bmap_add_extent_hole_delay(ip, idx, cur, new,
+                               &logflags, rsvd))
+                       goto done;
+       }
+       /*
+        * Real allocation off the end of the file.
+        */
+       else if (idx == nextents) {
+               if (cur)
+                       ASSERT((cur->bc_private.b.flags &
+                               XFS_BTCUR_BPRV_WASDEL) == 0);
+               if (error = xfs_bmap_add_extent_hole_real(ip, idx, cur, new,
+                               &logflags, whichfork))
+                       goto done;
+       } else {
+               xfs_bmbt_irec_t prev;   /* old extent at offset idx */
+
+               /*
+                * Get the record referred to by idx.
+                */
+               xfs_bmbt_get_all(&ifp->if_u1.if_extents[idx], &prev);
+               /*
+                * If it's a real allocation record, and the new allocation ends
+                * after the start of the referred to record, then we're filling
+                * in a delayed or unwritten allocation with a real one, or
+                * converting real back to unwritten.
+                */
+               if (!ISNULLSTARTBLOCK(new->br_startblock) &&
+                   new->br_startoff + new->br_blockcount > prev.br_startoff) {
+                       if (prev.br_state != XFS_EXT_UNWRITTEN && 
+                           ISNULLSTARTBLOCK(prev.br_startblock)) {
+                               da_old = STARTBLOCKVAL(prev.br_startblock);
+                               if (cur)
+                                       ASSERT(cur->bc_private.b.flags &
+                                               XFS_BTCUR_BPRV_WASDEL);
+                               if (error = xfs_bmap_add_extent_delay_real(ip,
+                                       idx, &cur, new, &da_new, first, flist,
+                                       &logflags, rsvd))
+                                       goto done;
+                       } else if (new->br_state == XFS_EXT_NORM) {
+                               ASSERT(new->br_state == XFS_EXT_NORM);
+                               if (error = xfs_bmap_add_extent_unwritten_real(
+                                       ip, idx, &cur, new, &logflags))
+                                       goto done;
+                       } else {
+                               ASSERT(new->br_state == XFS_EXT_UNWRITTEN);
+                               if (error = xfs_bmap_add_extent_unwritten_real(
+                                       ip, idx, &cur, new, &logflags))
+                                       goto done;
+                       }
+                       ASSERT(*curp == cur || *curp == NULL);
+               }
+               /*
+                * Otherwise we're filling in a hole with an allocation.
+                */
+               else {
+                       if (cur)
+                               ASSERT((cur->bc_private.b.flags &
+                                       XFS_BTCUR_BPRV_WASDEL) == 0);
+                       if (error = xfs_bmap_add_extent_hole_real(ip, idx, cur,
+                                       new, &logflags, whichfork))
+                               goto done;
+               }
+       }
+
+       ASSERT(*curp == cur || *curp == NULL);
+       /*
+        * Convert to a btree if necessary.
+        */
+       if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
+           XFS_IFORK_NEXTENTS(ip, whichfork) > ifp->if_ext_max) {
+               int     tmp_logflags;   /* partial log flag return val */
+
+               ASSERT(cur == NULL);
+               error = xfs_bmap_extents_to_btree(ip->i_transp, ip, first,
+                       flist, &cur, da_old > 0, &tmp_logflags, whichfork);
+               logflags |= tmp_logflags;
+               if (error)
+                       goto done;
+       }
+       /*
+        * Adjust for changes in reserved delayed indirect blocks.
+        * Nothing to do for disk quotas here.
+        */
+       if (da_old || da_new) {
+               xfs_filblks_t   nblks;
+
+               nblks = da_new;
+               if (cur)
+                       nblks += cur->bc_private.b.allocated;
+               ASSERT(nblks <= da_old);
+               if (nblks < da_old)
+                       xfs_mod_incore_sb(ip->i_mount, XFS_SBS_FDBLOCKS,
+                               (int)(da_old - nblks), rsvd);
+       }
+       /*
+        * Clear out the allocated field, done with it now in any case.
+        */
+       if (cur) {
+               cur->bc_private.b.allocated = 0;
+               *curp = cur;
+       }
+done:
+#ifdef XFSDEBUG
+       if (!error)
+               xfs_bmap_check_leaf_extents(*curp, ip, whichfork);
+#endif
+       *logflagsp = logflags;
+       return error;
+}
+
+/*
+ * Called by xfs_bmap_add_extent to handle cases converting a delayed
+ * allocation to a real allocation.
+ */
+STATIC int                             /* error */
+xfs_bmap_add_extent_delay_real(
+       xfs_inode_t             *ip,    /* incore inode pointer */
+       xfs_extnum_t            idx,    /* extent number to update/insert */
+       xfs_btree_cur_t         **curp, /* if *curp is null, not a btree */
+       xfs_bmbt_irec_t         *new,   /* new data to put in extent list */
+       xfs_filblks_t           *dnew,  /* new delayed-alloc indirect blocks */
+       xfs_fsblock_t           *first, /* pointer to firstblock variable */
+       xfs_bmap_free_t         *flist, /* list of extents to be freed */
+       int                     *logflagsp, /* inode logging flags */
+       int                     rsvd)   /* OK to use reserved data block allocation */
+{
+       xfs_bmbt_rec_t          *base;  /* base of extent entry list */
+       xfs_btree_cur_t         *cur;   /* btree cursor */
+       int                     diff;   /* temp value */
+       xfs_bmbt_rec_t          *ep;    /* extent entry for idx */
+       int                     error;  /* error return value */
+#ifdef XFS_BMAP_TRACE
+       static char             fname[] = "xfs_bmap_add_extent_delay_real";
+#endif
+       int                     i;      /* temp state */
+       xfs_fileoff_t           new_endoff;     /* end offset of new entry */
+       xfs_bmbt_irec_t         r[3];   /* neighbor extent entries */
+                                       /* left is 0, right is 1, prev is 2 */
+       int                     rval;   /* return value (logging flags) */
+       int                     state = 0;/* state bits, accessed thru macros */
+       xfs_filblks_t           temp;   /* value for dnew calculations */
+       xfs_filblks_t           temp2;  /* value for dnew calculations */
+       int                     tmp_rval;       /* partial logging flags */
+       enum {                          /* bit number definitions for state */
+               LEFT_CONTIG,    RIGHT_CONTIG,
+               LEFT_FILLING,   RIGHT_FILLING,
+               LEFT_DELAY,     RIGHT_DELAY,
+               LEFT_VALID,     RIGHT_VALID
+       };
+
+#define        LEFT            r[0]
+#define        RIGHT           r[1]
+#define        PREV            r[2]
+#define        MASK(b)         (1 << (b))
+#define        MASK2(a,b)      (MASK(a) | MASK(b))
+#define        MASK3(a,b,c)    (MASK2(a,b) | MASK(c))
+#define        MASK4(a,b,c,d)  (MASK3(a,b,c) | MASK(d))
+#define        STATE_SET(b,v)  ((v) ? (state |= MASK(b)) : (state &= ~MASK(b)))
+#define        STATE_TEST(b)   (state & MASK(b))
+#define        STATE_SET_TEST(b,v)     ((v) ? ((state |= MASK(b)), 1) : \
+                                      ((state &= ~MASK(b)), 0))
+#define        SWITCH_STATE            \
+       (state & MASK4(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG, RIGHT_CONTIG))
+
+       /*
+        * Set up a bunch of variables to make the tests simpler.
+        */
+       cur = *curp;
+       base = ip->i_df.if_u1.if_extents;
+       ep = &base[idx];
+       xfs_bmbt_get_all(ep, &PREV);
+       new_endoff = new->br_startoff + new->br_blockcount;
+       ASSERT(PREV.br_startoff <= new->br_startoff);
+       ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
+       /*
+        * Set flags determining what part of the previous delayed allocation
+        * extent is being replaced by a real allocation.
+        */
+       STATE_SET(LEFT_FILLING, PREV.br_startoff == new->br_startoff);
+       STATE_SET(RIGHT_FILLING,
+               PREV.br_startoff + PREV.br_blockcount == new_endoff);
+       /*
+        * Check and set flags if this segment has a left neighbor.
+        * Don't set contiguous if the combined extent would be too large.
+        */
+       if (STATE_SET_TEST(LEFT_VALID, idx > 0)) {
+               xfs_bmbt_get_all(ep - 1, &LEFT);
+               STATE_SET(LEFT_DELAY, ISNULLSTARTBLOCK(LEFT.br_startblock));
+       }
+       STATE_SET(LEFT_CONTIG, 
+               STATE_TEST(LEFT_VALID) && !STATE_TEST(LEFT_DELAY) &&
+               LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
+               LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
+               LEFT.br_state == new->br_state &&
+               LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN);
+       /*
+        * Check and set flags if this segment has a right neighbor.
+        * Don't set contiguous if the combined extent would be too large.
+        * Also check for all-three-contiguous being too large.
+        */
+       if (STATE_SET_TEST(RIGHT_VALID,
+                       idx <
+                       ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1)) {
+               xfs_bmbt_get_all(ep + 1, &RIGHT);
+               STATE_SET(RIGHT_DELAY, ISNULLSTARTBLOCK(RIGHT.br_startblock));
+       }
+       STATE_SET(RIGHT_CONTIG, 
+               STATE_TEST(RIGHT_VALID) && !STATE_TEST(RIGHT_DELAY) &&
+               new_endoff == RIGHT.br_startoff &&
+               new->br_startblock + new->br_blockcount ==
+                   RIGHT.br_startblock &&
+               new->br_state == RIGHT.br_state &&
+               new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN &&
+               ((state & MASK3(LEFT_CONTIG, LEFT_FILLING, RIGHT_FILLING)) !=
+                 MASK3(LEFT_CONTIG, LEFT_FILLING, RIGHT_FILLING) ||
+                LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
+                    <= MAXEXTLEN));
+       error = 0;
+       /*
+        * Switch out based on the FILLING and CONTIG state bits.
+        */
+       switch (SWITCH_STATE) {
+
+       case MASK4(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG, RIGHT_CONTIG):
+               /*
+                * Filling in all of a previously delayed allocation extent.
+                * The left and right neighbors are both contiguous with new.
+                */
+               xfs_bmap_trace_pre_update(fname, "LF|RF|LC|RC", ip, idx - 1,
+                       XFS_DATA_FORK);
+               xfs_bmbt_set_blockcount(ep - 1,
+                       LEFT.br_blockcount + PREV.br_blockcount +
+                       RIGHT.br_blockcount);
+               xfs_bmap_trace_post_update(fname, "LF|RF|LC|RC", ip, idx - 1,
+                       XFS_DATA_FORK);
+               xfs_bmap_trace_delete(fname, "LF|RF|LC|RC", ip, idx, 2,
+                       XFS_DATA_FORK);
+               xfs_bmap_delete_exlist(ip, idx, 2, XFS_DATA_FORK);
+               ip->i_df.if_lastex = idx - 1;
+               ip->i_d.di_nextents--;
+               if (cur == NULL)
+                       rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
+               else {
+                       rval = XFS_ILOG_CORE;
+                       if (error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff,
+                                       RIGHT.br_startblock,
+                                       RIGHT.br_blockcount, &i))
+                               goto done;
+                       ASSERT(i == 1);
+                       if (error = xfs_bmbt_delete(cur, 0, &i))
+                               goto done;
+                       ASSERT(i == 1);
+                       if (error = xfs_bmbt_decrement(cur, 0, &i))
+                               goto done;
+                       ASSERT(i == 1);
+                       if (error = xfs_bmbt_update(cur, LEFT.br_startoff,
+                                       LEFT.br_startblock,
+                                       LEFT.br_blockcount +
+                                       PREV.br_blockcount +
+                                       RIGHT.br_blockcount, LEFT.br_state))
+                               goto done;
+               }
+               *dnew = 0;
+               break;
+
+       case MASK3(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG):
+               /*
+                * Filling in all of a previously delayed allocation extent.
+                * The left neighbor is contiguous, the right is not.
+                */
+               xfs_bmap_trace_pre_update(fname, "LF|RF|LC", ip, idx - 1,
+                       XFS_DATA_FORK);
+               xfs_bmbt_set_blockcount(ep - 1,
+                       LEFT.br_blockcount + PREV.br_blockcount);
+               xfs_bmap_trace_post_update(fname, "LF|RF|LC", ip, idx - 1,
+                       XFS_DATA_FORK);
+               ip->i_df.if_lastex = idx - 1;
+               xfs_bmap_trace_delete(fname, "LF|RF|LC", ip, idx, 1,
+                       XFS_DATA_FORK);
+               xfs_bmap_delete_exlist(ip, idx, 1, XFS_DATA_FORK);
+               if (cur == NULL)
+                       rval = XFS_ILOG_DEXT;
+               else {
+                       rval = 0;
+                       if (error = xfs_bmbt_lookup_eq(cur, LEFT.br_startoff,
+                                       LEFT.br_startblock, LEFT.br_blockcount,
+                                       &i))
+                               goto done;
+                       ASSERT(i == 1);
+                       if (error = xfs_bmbt_update(cur, LEFT.br_startoff,
+                                       LEFT.br_startblock,
+                                       LEFT.br_blockcount +
+                                       PREV.br_blockcount, LEFT.br_state))
+                               goto done;
+               }
+               *dnew = 0;
+               break;
+
+       case MASK3(LEFT_FILLING, RIGHT_FILLING, RIGHT_CONTIG):
+               /*
+                * Filling in all of a previously delayed allocation extent.
+                * The right neighbor is contiguous, the left is not.
+                */
+               xfs_bmap_trace_pre_update(fname, "LF|RF|RC", ip, idx,
+                       XFS_DATA_FORK);
+               xfs_bmbt_set_startblock(ep, new->br_startblock);
+               xfs_bmbt_set_blockcount(ep,
+                       PREV.br_blockcount + RIGHT.br_blockcount);
+               xfs_bmap_trace_post_update(fname, "LF|RF|RC", ip, idx,
+                       XFS_DATA_FORK);
+               ip->i_df.if_lastex = idx;
+               xfs_bmap_trace_delete(fname, "LF|RF|RC", ip, idx + 1, 1,
+                       XFS_DATA_FORK);
+               xfs_bmap_delete_exlist(ip, idx + 1, 1, XFS_DATA_FORK);
+               if (cur == NULL)
+                       rval = XFS_ILOG_DEXT;
+               else {
+                       rval = 0;
+                       if (error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff,
+                                       RIGHT.br_startblock,
+                                       RIGHT.br_blockcount, &i))
+                               goto done;
+                       ASSERT(i == 1);
+                       if (error = xfs_bmbt_update(cur, PREV.br_startoff,
+                                       new->br_startblock,
+                                       PREV.br_blockcount +
+                                       RIGHT.br_blockcount, PREV.br_state))
+                               goto done;
+               }
+               *dnew = 0;
+               break;
+
+       case MASK2(LEFT_FILLING, RIGHT_FILLING):
+               /*
+                * Filling in all of a previously delayed allocation extent.
+                * Neither the left nor right neighbors are contiguous with
+                * the new one.
+                */
+               xfs_bmap_trace_pre_update(fname, "LF|RF", ip, idx,
+                       XFS_DATA_FORK);
+               xfs_bmbt_set_startblock(ep, new->br_startblock);
+               xfs_bmap_trace_post_update(fname, "LF|RF", ip, idx,
+                       XFS_DATA_FORK);
+               ip->i_df.if_lastex = idx;
+               ip->i_d.di_nextents++;
+               if (cur == NULL)
+                       rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
+               else {
+                       rval = XFS_ILOG_CORE;
+                       if (error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
+                                       new->br_startblock, new->br_blockcount,
+                                       &i))
+                               goto done;
+                       ASSERT(i == 0);
+                       cur->bc_rec.b.br_state = XFS_EXT_NORM;
+                       if (error = xfs_bmbt_insert(cur, &i))
+                               goto done;
+                       ASSERT(i == 1);
+               }
+               *dnew = 0;
+               break;
+
+       case MASK2(LEFT_FILLING, LEFT_CONTIG):
+               /*
+                * Filling in the first part of a previous delayed allocation.
+                * The left neighbor is contiguous.
+                */
+               xfs_bmap_trace_pre_update(fname, "LF|LC", ip, idx - 1,
+                       XFS_DATA_FORK);
+               xfs_bmbt_set_blockcount(ep - 1,
+                       LEFT.br_blockcount + new->br_blockcount);
+               xfs_bmbt_set_startoff(ep,
+                       PREV.br_startoff + new->br_blockcount);
+               xfs_bmap_trace_post_update(fname, "LF|LC", ip, idx - 1,
+                       XFS_DATA_FORK);
+               temp = PREV.br_blockcount - new->br_blockcount;
+               xfs_bmap_trace_pre_update(fname, "LF|LC", ip, idx,
+                       XFS_DATA_FORK);
+               xfs_bmbt_set_blockcount(ep, temp);
+               ip->i_df.if_lastex = idx - 1;
+               if (cur == NULL)
+                       rval = XFS_ILOG_DEXT;
+               else {
+                       rval = 0;
+                       if (error = xfs_bmbt_lookup_eq(cur, LEFT.br_startoff,
+                                       LEFT.br_startblock, LEFT.br_blockcount,
+                                       &i))
+                               goto done;
+                       ASSERT(i == 1);
+                       if (error = xfs_bmbt_update(cur, LEFT.br_startoff,
+                                       LEFT.br_startblock,
+                                       LEFT.br_blockcount +
+                                       new->br_blockcount,
+                                       LEFT.br_state))
+                               goto done;
+               }
+               temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
+                       STARTBLOCKVAL(PREV.br_startblock));
+               xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
+               xfs_bmap_trace_post_update(fname, "LF|LC", ip, idx,
+                       XFS_DATA_FORK);
+               *dnew = temp;
+               break;
+
+       case MASK(LEFT_FILLING):
+               /*
+                * Filling in the first part of a previous delayed allocation.
+                * The left neighbor is not contiguous.
+                */
+               xfs_bmap_trace_pre_update(fname, "LF", ip, idx, XFS_DATA_FORK);
+               xfs_bmbt_set_startoff(ep, new_endoff);
+               temp = PREV.br_blockcount - new->br_blockcount;
+               xfs_bmbt_set_blockcount(ep, temp);
+               xfs_bmap_trace_insert(fname, "LF", ip, idx, 1, new, NULL,
+                       XFS_DATA_FORK);
+               xfs_bmap_insert_exlist(ip, idx, 1, new, XFS_DATA_FORK);
+               ip->i_df.if_lastex = idx;
+               ip->i_d.di_nextents++;
+               if (cur == NULL)
+                       rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
+               else {
+                       rval = XFS_ILOG_CORE;
+                       if (error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
+                                       new->br_startblock, new->br_blockcount,
+                                       &i))
+                               goto done;
+                       ASSERT(i == 0);
+                       cur->bc_rec.b.br_state = XFS_EXT_NORM;
+                       if (error = xfs_bmbt_insert(cur, &i))
+                               goto done;
+                       ASSERT(i == 1);
+               }
+               if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
+                   ip->i_d.di_nextents > ip->i_df.if_ext_max) {
+                       error = xfs_bmap_extents_to_btree(ip->i_transp, ip,
+                                       first, flist, &cur, 1, &tmp_rval,
+                                       XFS_DATA_FORK);
+                       rval |= tmp_rval;
+                       if (error)
+                               goto done;
+               }
+               temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
+                       STARTBLOCKVAL(PREV.br_startblock) -
+                       (cur ? cur->bc_private.b.allocated : 0));
+               base = ip->i_df.if_u1.if_extents;
+               ep = &base[idx + 1];
+               xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
+               xfs_bmap_trace_post_update(fname, "LF", ip, idx + 1,
+                       XFS_DATA_FORK);
+               *dnew = temp;
+               break;
+
+       case MASK2(RIGHT_FILLING, RIGHT_CONTIG):
+               /*
+                * Filling in the last part of a previous delayed allocation.
+                * The right neighbor is contiguous with the new allocation.
+                */
+               temp = PREV.br_blockcount - new->br_blockcount;
+               xfs_bmap_trace_pre_update(fname, "RF|RC", ip, idx,
+                       XFS_DATA_FORK);
+               xfs_bmap_trace_pre_update(fname, "RF|RC", ip, idx + 1,
+                       XFS_DATA_FORK);
+               xfs_bmbt_set_blockcount(ep, temp);
+               xfs_bmbt_set_allf(ep + 1, new->br_startoff, new->br_startblock,
+                       new->br_blockcount + RIGHT.br_blockcount, 
+                       RIGHT.br_state);
+               xfs_bmap_trace_post_update(fname, "RF|RC", ip, idx + 1,
+                       XFS_DATA_FORK);
+               ip->i_df.if_lastex = idx + 1;
+               if (cur == NULL)
+                       rval = XFS_ILOG_DEXT;
+               else {
+                       rval = 0;
+                       if (error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff,
+                                       RIGHT.br_startblock,
+                                       RIGHT.br_blockcount, &i))
+                               goto done;
+                       ASSERT(i == 1);
+                       if (error = xfs_bmbt_update(cur, new->br_startoff,
+                                       new->br_startblock,
+                                       new->br_blockcount +
+                                       RIGHT.br_blockcount,
+                                       RIGHT.br_state))
+                               goto done;
+               }
+               temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
+                       STARTBLOCKVAL(PREV.br_startblock));
+               xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
+               xfs_bmap_trace_post_update(fname, "RF|RC", ip, idx,
+                       XFS_DATA_FORK);
+               *dnew = temp;
+               break;
+
+       case MASK(RIGHT_FILLING):
+               /*
+                * Filling in the last part of a previous delayed allocation.
+                * The right neighbor is not contiguous.
+                */
+               temp = PREV.br_blockcount - new->br_blockcount;
+               xfs_bmap_trace_pre_update(fname, "RF", ip, idx, XFS_DATA_FORK);
+               xfs_bmbt_set_blockcount(ep, temp);
+               xfs_bmap_trace_insert(fname, "RF", ip, idx + 1, 1,
+                       new, NULL, XFS_DATA_FORK);
+               xfs_bmap_insert_exlist(ip, idx + 1, 1, new, XFS_DATA_FORK);
+               ip->i_df.if_lastex = idx + 1;
+               ip->i_d.di_nextents++;
+               if (cur == NULL)
+                       rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
+               else {
+                       rval = XFS_ILOG_CORE;
+                       if (error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
+                                       new->br_startblock, new->br_blockcount,
+                                       &i))
+                               goto done;
+                       ASSERT(i == 0);
+                       cur->bc_rec.b.br_state = XFS_EXT_NORM;
+                       if (error = xfs_bmbt_insert(cur, &i))
+                               goto done;
+                       ASSERT(i == 1);
+               }
+               if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
+                   ip->i_d.di_nextents > ip->i_df.if_ext_max) {
+                       error = xfs_bmap_extents_to_btree(ip->i_transp, ip,
+                               first, flist, &cur, 1, &tmp_rval,
+                               XFS_DATA_FORK);
+                       rval |= tmp_rval;
+                       if (error)
+                               goto done;
+               }
+               temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
+                       STARTBLOCKVAL(PREV.br_startblock) -
+                       (cur ? cur->bc_private.b.allocated : 0));
+               base = ip->i_df.if_u1.if_extents;
+               ep = &base[idx];
+               xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
+               xfs_bmap_trace_post_update(fname, "RF", ip, idx, XFS_DATA_FORK);
+               *dnew = temp;
+               break;
+
+       case 0:
+               /*
+                * Filling in the middle part of a previous delayed allocation.
+                * Contiguity is impossible here.
+                * This case is avoided almost all the time.
+                */
+               temp = new->br_startoff - PREV.br_startoff;
+               xfs_bmap_trace_pre_update(fname, "0", ip, idx, XFS_DATA_FORK);
+               xfs_bmbt_set_blockcount(ep, temp);
+               r[0] = *new;
+               r[1].br_startoff = new_endoff;
+               temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff;
+               r[1].br_blockcount = temp2;
+               xfs_bmap_trace_insert(fname, "0", ip, idx + 1, 2, &r[0], &r[1],
+                       XFS_DATA_FORK);
+               xfs_bmap_insert_exlist(ip, idx + 1, 2, &r[0], XFS_DATA_FORK);
+               ip->i_df.if_lastex = idx + 1;
+               ip->i_d.di_nextents++;
+               if (cur == NULL)
+                       rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
+               else {
+                       rval = XFS_ILOG_CORE;
+                       if (error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
+                                       new->br_startblock, new->br_blockcount,
+                                       &i))
+                               goto done;
+                       ASSERT(i == 0);
+                       cur->bc_rec.b.br_state = XFS_EXT_NORM;
+                       if (error = xfs_bmbt_insert(cur, &i))
+                               goto done;
+                       ASSERT(i == 1);
+               }
+               if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
+                   ip->i_d.di_nextents > ip->i_df.if_ext_max) {
+                       error = xfs_bmap_extents_to_btree(ip->i_transp, ip,
+                                       first, flist, &cur, 1, &tmp_rval,
+                                       XFS_DATA_FORK);
+                       rval |= tmp_rval;
+                       if (error)
+                               goto done;
+               }
+               temp = xfs_bmap_worst_indlen(ip, temp);
+               temp2 = xfs_bmap_worst_indlen(ip, temp2);
+               diff = (int)(temp + temp2 - STARTBLOCKVAL(PREV.br_startblock) -
+                       (cur ? cur->bc_private.b.allocated : 0));
+               if (diff > 0 &&
+                   xfs_mod_incore_sb(ip->i_mount, XFS_SBS_FDBLOCKS, -diff, rsvd)) {
+                       /*
+                        * Ick gross gag me with a spoon.
+                        */
+                       ASSERT(0);      /* want to see if this ever happens! */
+                       while (diff > 0) {
+                               if (temp) {
+                                       temp--;
+                                       diff--;
+                                       if (!diff ||
+                                           !xfs_mod_incore_sb(ip->i_mount,
+                                                   XFS_SBS_FDBLOCKS, -diff, rsvd))
+                                               break;
+                               }
+                               if (temp2) {
+                                       temp2--;
+                                       diff--;
+                                       if (!diff ||
+                                           !xfs_mod_incore_sb(ip->i_mount,
+                                                   XFS_SBS_FDBLOCKS, -diff, rsvd))
+                                               break;
+                               }
+                       }
+               }
+               base = ip->i_df.if_u1.if_extents;
+               ep = &base[idx];
+               xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
+               xfs_bmap_trace_post_update(fname, "0", ip, idx, XFS_DATA_FORK);
+               xfs_bmap_trace_pre_update(fname, "0", ip, idx + 2,
+                       XFS_DATA_FORK);
+               xfs_bmbt_set_startblock(ep + 2, NULLSTARTBLOCK((int)temp2));
+               xfs_bmap_trace_post_update(fname, "0", ip, idx + 2,
+                       XFS_DATA_FORK);
+               *dnew = temp + temp2;
+               break;
+
+       case MASK3(LEFT_FILLING, LEFT_CONTIG, RIGHT_CONTIG):
+       case MASK3(RIGHT_FILLING, LEFT_CONTIG, RIGHT_CONTIG):
+       case MASK2(LEFT_FILLING, RIGHT_CONTIG):
+       case MASK2(RIGHT_FILLING, LEFT_CONTIG):
+       case MASK2(LEFT_CONTIG, RIGHT_CONTIG):
+       case MASK(LEFT_CONTIG):
+       case MASK(RIGHT_CONTIG):
+               /*
+                * These cases are all impossible.
+                */
+               ASSERT(0);
+       }
+       *curp = cur;
+done:
+       *logflagsp = rval;
+       return error;
+#undef LEFT
+#undef RIGHT
+#undef PREV
+#undef MASK
+#undef MASK2
+#undef MASK3
+#undef MASK4
+#undef STATE_SET
+#undef STATE_TEST
+#undef STATE_SET_TEST
+#undef SWITCH_STATE
+}
+
+/*
+ * Called by xfs_bmap_add_extent to handle cases converting an unwritten
+ * allocation to a real allocation or vice versa.
+ */
+STATIC int                             /* error */
+xfs_bmap_add_extent_unwritten_real(
+       xfs_inode_t             *ip,    /* incore inode pointer */
+       xfs_extnum_t            idx,    /* extent number to update/insert */
+       xfs_btree_cur_t         **curp, /* if *curp is null, not a btree */
+       xfs_bmbt_irec_t         *new,   /* new data to put in extent list */
+       int                     *logflagsp) /* inode logging flags */
+{
+       xfs_bmbt_rec_t          *base;  /* base of extent entry list */
+       xfs_btree_cur_t         *cur;   /* btree cursor */
+       xfs_bmbt_rec_t          *ep;    /* extent entry for idx */
+       int                     error;  /* error return value */
+#ifdef XFS_BMAP_TRACE
+       static char             fname[] = "xfs_bmap_add_extent_unwritten_real";
+#endif
+       int                     i;      /* temp state */
+       xfs_fileoff_t           new_endoff;     /* end offset of new entry */
+       xfs_exntst_t            newext; /* new extent state */
+       xfs_exntst_t            oldext; /* old extent state */
+       xfs_bmbt_irec_t         r[3];   /* neighbor extent entries */
+                                       /* left is 0, right is 1, prev is 2 */
+       int                     rval;   /* return value (logging flags) */
+       int                     state = 0;/* state bits, accessed thru macros */
+       enum {                          /* bit number definitions for state */
+               LEFT_CONTIG,    RIGHT_CONTIG,
+               LEFT_FILLING,   RIGHT_FILLING,
+               LEFT_DELAY,     RIGHT_DELAY,
+               LEFT_VALID,     RIGHT_VALID
+       };
+
+#define        LEFT            r[0]
+#define        RIGHT           r[1]
+#define        PREV            r[2]
+#define        MASK(b)         (1 << (b))
+#define        MASK2(a,b)      (MASK(a) | MASK(b))
+#define        MASK3(a,b,c)    (MASK2(a,b) | MASK(c))
+#define        MASK4(a,b,c,d)  (MASK3(a,b,c) | MASK(d))
+#define        STATE_SET(b,v)  ((v) ? (state |= MASK(b)) : (state &= ~MASK(b)))
+#define        STATE_TEST(b)   (state & MASK(b))
+#define        STATE_SET_TEST(b,v)     ((v) ? ((state |= MASK(b)), 1) : \
+                                      ((state &= ~MASK(b)), 0))
+#define        SWITCH_STATE            \
+       (state & MASK4(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG, RIGHT_CONTIG))
+
+       /*
+        * Set up a bunch of variables to make the tests simpler.
+        */
+       error = 0;
+       cur = *curp;
+       base = ip->i_df.if_u1.if_extents;
+       ep = &base[idx];
+       xfs_bmbt_get_all(ep, &PREV);
+       newext = new->br_state;
+       oldext = (newext == XFS_EXT_UNWRITTEN) ?
+               XFS_EXT_NORM : XFS_EXT_UNWRITTEN;
+       ASSERT(PREV.br_state == oldext);
+       new_endoff = new->br_startoff + new->br_blockcount;
+       ASSERT(PREV.br_startoff <= new->br_startoff);
+       ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
+       /*
+        * Set flags determining what part of the previous oldext allocation
+        * extent is being replaced by a newext allocation.
+        */
+       STATE_SET(LEFT_FILLING, PREV.br_startoff == new->br_startoff);
+       STATE_SET(RIGHT_FILLING,
+               PREV.br_startoff + PREV.br_blockcount == new_endoff);
+       /*
+        * Check and set flags if this segment has a left neighbor.
+        * Don't set contiguous if the combined extent would be too large.
+        */
+       if (STATE_SET_TEST(LEFT_VALID, idx > 0)) {
+               xfs_bmbt_get_all(ep - 1, &LEFT);
+               STATE_SET(LEFT_DELAY, ISNULLSTARTBLOCK(LEFT.br_startblock));
+       }
+       STATE_SET(LEFT_CONTIG, 
+               STATE_TEST(LEFT_VALID) && !STATE_TEST(LEFT_DELAY) &&
+               LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
+               LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
+               LEFT.br_state == newext &&
+               LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN);
+       /*
+        * Check and set flags if this segment has a right neighbor.
+        * Don't set contiguous if the combined extent would be too large.
+        * Also check for all-three-contiguous being too large.
+        */
+       if (STATE_SET_TEST(RIGHT_VALID,
+                       idx <
+                       ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1)) {
+               xfs_bmbt_get_all(ep + 1, &RIGHT);
+               STATE_SET(RIGHT_DELAY, ISNULLSTARTBLOCK(RIGHT.br_startblock));
+       }
+       STATE_SET(RIGHT_CONTIG, 
+               STATE_TEST(RIGHT_VALID) && !STATE_TEST(RIGHT_DELAY) &&
+               new_endoff == RIGHT.br_startoff &&
+               new->br_startblock + new->br_blockcount ==
+                   RIGHT.br_startblock &&
+               newext == RIGHT.br_state &&
+               new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN &&
+               ((state & MASK3(LEFT_CONTIG, LEFT_FILLING, RIGHT_FILLING)) !=
+                 MASK3(LEFT_CONTIG, LEFT_FILLING, RIGHT_FILLING) ||
+                LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
+                    <= MAXEXTLEN));
+       /*
+        * Switch out based on the FILLING and CONTIG state bits.
+        */
+       switch (SWITCH_STATE) {
+
+       case MASK4(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG, RIGHT_CONTIG):
+               /*
+                * Setting all of a previous oldext extent to newext.
+                * The left and right neighbors are both contiguous with new.
+                */
+               xfs_bmap_trace_pre_update(fname, "LF|RF|LC|RC", ip, idx - 1,
+                       XFS_DATA_FORK);
+               xfs_bmbt_set_blockcount(ep - 1,
+                       LEFT.br_blockcount + PREV.br_blockcount +
+                       RIGHT.br_blockcount);
+               xfs_bmap_trace_post_update(fname, "LF|RF|LC|RC", ip, idx - 1,
+                       XFS_DATA_FORK);
+               xfs_bmap_trace_delete(fname, "LF|RF|LC|RC", ip, idx, 2,
+                       XFS_DATA_FORK);
+               xfs_bmap_delete_exlist(ip, idx, 2, XFS_DATA_FORK);
+               ip->i_df.if_lastex = idx - 1;
+               ip->i_d.di_nextents -= 2;
+               if (cur == NULL)
+                       rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
+               else {
+                       rval = XFS_ILOG_CORE;
+                       if (error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff,
+                                       RIGHT.br_startblock,
+                                       RIGHT.br_blockcount, &i))
+                               goto done;
+                       ASSERT(i == 1);
+                       if (error = xfs_bmbt_delete(cur, 0, &i))
+                               goto done;
+                       ASSERT(i == 1);
+                       if (error = xfs_bmbt_decrement(cur, 0, &i))
+                               goto done;
+                       ASSERT(i == 1);
+                       if (error = xfs_bmbt_delete(cur, 0, &i))
+                               goto done;
+                       ASSERT(i == 1);
+                       if (error = xfs_bmbt_decrement(cur, 0, &i))
+                               goto done;
+                       ASSERT(i == 1);
+                       if (error = xfs_bmbt_update(cur, LEFT.br_startoff,
+                               LEFT.br_startblock,
+                               LEFT.br_blockcount + PREV.br_blockcount +
+                               RIGHT.br_blockcount, LEFT.br_state))
+                               goto done;
+               }
+               break;
+
+       case MASK3(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG):
+               /*
+                * Setting all of a previous oldext extent to newext.
+                * The left neighbor is contiguous, the right is not.
+                */
+               xfs_bmap_trace_pre_update(fname, "LF|RF|LC", ip, idx - 1,
+                       XFS_DATA_FORK);
+               xfs_bmbt_set_blockcount(ep - 1,
+                       LEFT.br_blockcount + PREV.br_blockcount);
+               xfs_bmap_trace_post_update(fname, "LF|RF|LC", ip, idx - 1,
+                       XFS_DATA_FORK);
+               ip->i_df.if_lastex = idx - 1;
+               xfs_bmap_trace_delete(fname, "LF|RF|LC", ip, idx, 1,
+                       XFS_DATA_FORK);
+               xfs_bmap_delete_exlist(ip, idx, 1, XFS_DATA_FORK);
+               ip->i_d.di_nextents--;
+               if (cur == NULL)
+                       rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
+               else {
+                       rval = XFS_ILOG_CORE;
+                       if (error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
+                                       PREV.br_startblock, PREV.br_blockcount,
+                                       &i))
+                               goto done;
+                       ASSERT(i == 1);
+                       if (error = xfs_bmbt_delete(cur, 0, &i))
+                               goto done;
+                       ASSERT(i == 1);
+                       if (error = xfs_bmbt_decrement(cur, 0, &i))
+                               goto done;
+                       ASSERT(i == 1);
+                       if (error = xfs_bmbt_update(cur, LEFT.br_startoff,
+                               LEFT.br_startblock,
+                               LEFT.br_blockcount + PREV.br_blockcount,
+                               LEFT.br_state))
+                               goto done;
+               }
+               break;
+
+       case MASK3(LEFT_FILLING, RIGHT_FILLING, RIGHT_CONTIG):
+               /*
+                * Setting all of a previous oldext extent to newext.
+                * The right neighbor is contiguous, the left is not.
+                */
+               xfs_bmap_trace_pre_update(fname, "LF|RF|RC", ip, idx,
+                       XFS_DATA_FORK);
+               xfs_bmbt_set_blockcount(ep,
+                       PREV.br_blockcount + RIGHT.br_blockcount);
+               xfs_bmbt_set_state(ep, newext);
+               xfs_bmap_trace_post_update(fname, "LF|RF|RC", ip, idx,
+                       XFS_DATA_FORK);
+               ip->i_df.if_lastex = idx;
+               xfs_bmap_trace_delete(fname, "LF|RF|RC", ip, idx + 1, 1,
+                       XFS_DATA_FORK);
+               xfs_bmap_delete_exlist(ip, idx + 1, 1, XFS_DATA_FORK);
+               ip->i_d.di_nextents--;
+               if (cur == NULL)
+                       rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
+               else {
+                       rval = XFS_ILOG_CORE;
+                       if (error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff,
+                                       RIGHT.br_startblock,
+                                       RIGHT.br_blockcount, &i))
+                               goto done;
+                       ASSERT(i == 1);
+                       if (error = xfs_bmbt_delete(cur, 0, &i))
+                               goto done;
+                       ASSERT(i == 1);
+                       if (error = xfs_bmbt_decrement(cur, 0, &i))
+                               goto done;
+                       ASSERT(i == 1);
+                       if (error = xfs_bmbt_update(cur, new->br_startoff,
+                               new->br_startblock,
+                               new->br_blockcount + RIGHT.br_blockcount,
+                               newext))
+                               goto done;
+               }
+               break;
+
+       case MASK2(LEFT_FILLING, RIGHT_FILLING):
+               /*
+                * Setting all of a previous oldext extent to newext.
+                * Neither the left nor right neighbors are contiguous with
+                * the new one.
+                */
+               xfs_bmap_trace_pre_update(fname, "LF|RF", ip, idx,
+                       XFS_DATA_FORK);
+               xfs_bmbt_set_state(ep, newext);
+               xfs_bmap_trace_post_update(fname, "LF|RF", ip, idx,
+                       XFS_DATA_FORK);
+               ip->i_df.if_lastex = idx;
+               if (cur == NULL)
+                       rval = XFS_ILOG_DEXT;
+               else {
+                       rval = 0;
+                       if (error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
+                                       new->br_startblock, new->br_blockcount,
+                                       &i))
+                               goto done;
+                       ASSERT(i == 1);
+                       if (error = xfs_bmbt_update(cur, new->br_startoff,
+                               new->br_startblock, new->br_blockcount,
+                               newext))
+                               goto done;
+               }
+               break;
+
+       case MASK2(LEFT_FILLING, LEFT_CONTIG):
+               /*
+                * Setting the first part of a previous oldext extent to newext.
+                * The left neighbor is contiguous.
+                */
+               xfs_bmap_trace_pre_update(fname, "LF|LC", ip, idx - 1,
+                       XFS_DATA_FORK);
+               xfs_bmbt_set_blockcount(ep - 1,
+                       LEFT.br_blockcount + new->br_blockcount);
+               xfs_bmbt_set_startoff(ep,
+                       PREV.br_startoff + new->br_blockcount);
+               xfs_bmap_trace_post_update(fname, "LF|LC", ip, idx - 1,
+                       XFS_DATA_FORK);
+               xfs_bmap_trace_pre_update(fname, "LF|LC", ip, idx,
+                       XFS_DATA_FORK);
+               xfs_bmbt_set_startblock(ep,
+                       new->br_startblock + new->br_blockcount);
+               xfs_bmbt_set_blockcount(ep,
+                       PREV.br_blockcount - new->br_blockcount);
+               xfs_bmap_trace_post_update(fname, "LF|LC", ip, idx,
+                       XFS_DATA_FORK);
+               ip->i_df.if_lastex = idx - 1;
+               if (cur == NULL)
+                       rval = XFS_ILOG_DEXT;
+               else {
+                       rval = 0;
+                       if (error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
+                                       PREV.br_startblock, PREV.br_blockcount,
+                                       &i))
+                               goto done;
+                       ASSERT(i == 1);
+                       if (error = xfs_bmbt_update(cur,
+                               PREV.br_startoff + new->br_blockcount,
+                               PREV.br_startblock + new->br_blockcount,
+                               PREV.br_blockcount - new->br_blockcount,
+                               oldext))
+                               goto done;
+                       if (error = xfs_bmbt_decrement(cur, 0, &i))
+                               goto done;
+                       if (xfs_bmbt_update(cur, LEFT.br_startoff,
+                               LEFT.br_startblock,
+                               LEFT.br_blockcount + new->br_blockcount,
+                               LEFT.br_state))
+                               goto done;
+               }
+               break;
+
+       case MASK(LEFT_FILLING):
+               /*
+                * Setting the first part of a previous oldext extent to newext.
+                * The left neighbor is not contiguous.
+                */
+               xfs_bmap_trace_pre_update(fname, "LF", ip, idx, XFS_DATA_FORK);
+               ASSERT(ep && xfs_bmbt_get_state(ep) == oldext);
+               xfs_bmbt_set_startoff(ep, new_endoff);
+               xfs_bmbt_set_blockcount(ep,
+                       PREV.br_blockcount - new->br_blockcount);
+               xfs_bmbt_set_startblock(ep,
+                       new->br_startblock + new->br_blockcount);
+               xfs_bmap_trace_post_update(fname, "LF", ip, idx, XFS_DATA_FORK);
+               xfs_bmap_trace_insert(fname, "LF", ip, idx, 1, new, NULL,
+                       XFS_DATA_FORK);
+               xfs_bmap_insert_exlist(ip, idx, 1, new, XFS_DATA_FORK);
+               ip->i_df.if_lastex = idx;
+               ip->i_d.di_nextents++;
+               if (cur == NULL)
+                       rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
+               else {
+                       rval = XFS_ILOG_CORE;
+                       if (error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
+                                       PREV.br_startblock, PREV.br_blockcount,
+                                       &i))
+                               goto done;
+                       ASSERT(i == 1);
+                       if (error = xfs_bmbt_update(cur,
+                               PREV.br_startoff + new->br_blockcount,
+                               PREV.br_startblock + new->br_blockcount,
+                               PREV.br_blockcount - new->br_blockcount,
+                               oldext))
+                               goto done;
+                       cur->bc_rec.b = *new;
+                       if (error = xfs_bmbt_insert(cur, &i))
+                               goto done;
+                       ASSERT(i == 1);
+               }
+               break;
+
+       case MASK2(RIGHT_FILLING, RIGHT_CONTIG):
+               /*
+                * Setting the last part of a previous oldext extent to newext.
+                * The right neighbor is contiguous with the new allocation.
+                */
+               xfs_bmap_trace_pre_update(fname, "RF|RC", ip, idx,
+                       XFS_DATA_FORK);
+               xfs_bmap_trace_pre_update(fname, "RF|RC", ip, idx + 1,
+                       XFS_DATA_FORK);
+               xfs_bmbt_set_blockcount(ep,
+                       PREV.br_blockcount - new->br_blockcount);
+               xfs_bmap_trace_post_update(fname, "RF|RC", ip, idx,
+                       XFS_DATA_FORK);
+               xfs_bmbt_set_allf(ep + 1, new->br_startoff, new->br_startblock,
+                       new->br_blockcount + RIGHT.br_blockcount, newext);
+               xfs_bmap_trace_post_update(fname, "RF|RC", ip, idx + 1,
+                       XFS_DATA_FORK);
+               ip->i_df.if_lastex = idx + 1;
+               if (cur == NULL)
+                       rval = XFS_ILOG_DEXT;
+               else {
+                       rval = 0;
+                       if (error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
+                                       PREV.br_startblock,
+                                       PREV.br_blockcount, &i))
+                               goto done;
+                       ASSERT(i == 1);
+                       if (error = xfs_bmbt_update(cur, PREV.br_startoff,
+                               PREV.br_startblock, 
+                               PREV.br_blockcount - new->br_blockcount,
+                               oldext))
+                               goto done;
+                       if (error = xfs_bmbt_increment(cur, 0, &i))
+                               goto done;
+                       if (error = xfs_bmbt_update(cur, new->br_startoff,
+                               new->br_startblock,
+                               new->br_blockcount + RIGHT.br_blockcount,
+                               newext))
+                               goto done;
+               }
+               break;
+
+       case MASK(RIGHT_FILLING):
+               /*
+                * Setting the last part of a previous oldext extent to newext.
+                * The right neighbor is not contiguous.
+                */
+               xfs_bmap_trace_pre_update(fname, "RF", ip, idx, XFS_DATA_FORK);
+               xfs_bmbt_set_blockcount(ep,
+                       PREV.br_blockcount - new->br_blockcount);
+               xfs_bmap_trace_post_update(fname, "RF", ip, idx, XFS_DATA_FORK);
+               xfs_bmap_trace_insert(fname, "RF", ip, idx + 1, 1,
+                       new, NULL, XFS_DATA_FORK);
+               xfs_bmap_insert_exlist(ip, idx + 1, 1, new, XFS_DATA_FORK);
+               ip->i_df.if_lastex = idx + 1;
+               ip->i_d.di_nextents++;
+               if (cur == NULL)
+                       rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
+               else {
+                       rval = XFS_ILOG_CORE;
+                       if (error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
+                                       PREV.br_startblock, PREV.br_blockcount,
+                                       &i))
+                               goto done;
+                       ASSERT(i == 1);
+                       if (error = xfs_bmbt_update(cur, PREV.br_startoff,
+                               PREV.br_startblock, 
+                               PREV.br_blockcount - new->br_blockcount,
+                               oldext))
+                               goto done;
+                       if (error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
+                                       new->br_startblock, new->br_blockcount,
+                                       &i))
+                               goto done;
+                       ASSERT(i == 0);
+                       cur->bc_rec.b.br_state = XFS_EXT_NORM;
+                       if (error = xfs_bmbt_insert(cur, &i))
+                               goto done;
+                       ASSERT(i == 1);
+               }
+               break;
+
+       case 0:
+               /*
+                * Setting the middle part of a previous oldext extent to
+                * newext.  Contiguity is impossible here.
+                * One extent becomes three extents.
+                */
+               xfs_bmap_trace_pre_update(fname, "0", ip, idx, XFS_DATA_FORK);
+               xfs_bmbt_set_blockcount(ep,
+                       new->br_startoff - PREV.br_startoff);
+               xfs_bmap_trace_post_update(fname, "0", ip, idx, XFS_DATA_FORK);
+               r[0] = *new;
+               r[1].br_startoff = new_endoff;
+               r[1].br_blockcount =
+                       PREV.br_startoff + PREV.br_blockcount - new_endoff;
+               r[1].br_startblock = new->br_startblock + new->br_blockcount;
+               r[1].br_state = oldext;
+               xfs_bmap_trace_insert(fname, "0", ip, idx + 1, 2, &r[0], &r[1],
+                       XFS_DATA_FORK);
+               xfs_bmap_insert_exlist(ip, idx + 1, 2, &r[0], XFS_DATA_FORK);
+               ip->i_df.if_lastex = idx + 1;
+               ip->i_d.di_nextents += 2;
+               if (cur == NULL)
+                       rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
+               else {
+                       rval = XFS_ILOG_CORE;
+                       if (error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
+                                       PREV.br_startblock, PREV.br_blockcount,
+                                       &i))
+                               goto done;
+                       ASSERT(i == 1);
+                       /* new right extent - oldext */
+                       if (error = xfs_bmbt_update(cur, r[1].br_startoff,
+                               r[1].br_startblock, r[1].br_blockcount,
+                               r[1].br_state))
+                               goto done;
+                       /* new left extent - oldext */
+                       PREV.br_blockcount =
+                               new->br_startoff - PREV.br_startoff;
+                       cur->bc_rec.b = PREV;
+                       if (error = xfs_bmbt_insert(cur, &i))
+                               goto done;
+                       ASSERT(i == 1);
+                       if (error = xfs_bmbt_increment(cur, 0, &i))
+                               goto done;
+                       ASSERT(i == 1);
+                       /* new middle extent - newext */
+                       cur->bc_rec.b = *new;
+                       if (error = xfs_bmbt_insert(cur, &i))
+                               goto done;
+                       ASSERT(i == 1);
+               }
+               break;
+
+       case MASK3(LEFT_FILLING, LEFT_CONTIG, RIGHT_CONTIG):
+       case MASK3(RIGHT_FILLING, LEFT_CONTIG, RIGHT_CONTIG):
+       case MASK2(LEFT_FILLING, RIGHT_CONTIG):
+       case MASK2(RIGHT_FILLING, LEFT_CONTIG):
+       case MASK2(LEFT_CONTIG, RIGHT_CONTIG):
+       case MASK(LEFT_CONTIG):
+       case MASK(RIGHT_CONTIG):
+               /*
+                * These cases are all impossible.
+                */
+               ASSERT(0);
+       }
+       *curp = cur;
+done:
+       *logflagsp = rval;
+       return error;
+#undef LEFT
+#undef RIGHT
+#undef PREV
+#undef MASK
+#undef MASK2
+#undef MASK3
+#undef MASK4
+#undef STATE_SET
+#undef STATE_TEST
+#undef STATE_SET_TEST
+#undef SWITCH_STATE
+}
+
+/*
+ * Called by xfs_bmap_add_extent to handle cases converting a hole
+ * to a delayed allocation.
+ */
+/*ARGSUSED*/
+STATIC int                             /* error */
+xfs_bmap_add_extent_hole_delay(
+       xfs_inode_t             *ip,    /* incore inode pointer */
+       xfs_extnum_t            idx,    /* extent number to update/insert */
+       xfs_btree_cur_t         *cur,   /* if null, not a btree */
+       xfs_bmbt_irec_t         *new,   /* new data to put in extent list */
+       int                     *logflagsp, /* inode logging flags */
+       int                     rsvd)           /* OK to allocate reserved blocks */
+{
+       xfs_bmbt_rec_t          *base;  /* base of extent entry list */
+       xfs_bmbt_rec_t          *ep;    /* extent list entry for idx */
+#ifdef XFS_BMAP_TRACE
+       static char             fname[] = "xfs_bmap_add_extent_hole_delay";
+#endif
+       xfs_bmbt_irec_t         left;   /* left neighbor extent entry */
+       xfs_filblks_t           newlen; /* new indirect size */
+       xfs_filblks_t           oldlen; /* old indirect size */
+       xfs_bmbt_irec_t         right;  /* right neighbor extent entry */
+       int                     state;  /* state bits, accessed thru macros */
+       xfs_filblks_t           temp;   /* temp for indirect calculations */
+       enum {                          /* bit number definitions for state */
+               LEFT_CONTIG,    RIGHT_CONTIG,
+               LEFT_DELAY,     RIGHT_DELAY,
+               LEFT_VALID,     RIGHT_VALID
+       };
+
+#define        MASK(b)                 (1 << (b))
+#define        MASK2(a,b)              (MASK(a) | MASK(b))
+#define        STATE_SET(b,v)          ((v) ? (state |= MASK(b)) : (state &= ~MASK(b)))
+#define        STATE_TEST(b)           (state & MASK(b))
+#define        STATE_SET_TEST(b,v)     ((v) ? ((state |= MASK(b)), 1) : \
+                                      ((state &= ~MASK(b)), 0))
+#define        SWITCH_STATE            (state & MASK2(LEFT_CONTIG, RIGHT_CONTIG))
+
+       base = ip->i_df.if_u1.if_extents;
+       ep = &base[idx];
+       state = 0;
+       ASSERT(ISNULLSTARTBLOCK(new->br_startblock));
+       /*
+        * Check and set flags if this segment has a left neighbor
+        */
+       if (STATE_SET_TEST(LEFT_VALID, idx > 0)) {
+               xfs_bmbt_get_all(ep - 1, &left);
+               STATE_SET(LEFT_DELAY, ISNULLSTARTBLOCK(left.br_startblock));
+       }
+       /*
+        * Check and set flags if the current (right) segment exists.
+        * If it doesn't exist, we're converting the hole at end-of-file.
+        */
+       if (STATE_SET_TEST(RIGHT_VALID,
+                          idx <
+                          ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t))) {
+               xfs_bmbt_get_all(ep, &right);
+               STATE_SET(RIGHT_DELAY, ISNULLSTARTBLOCK(right.br_startblock));
+       }
+       /*
+        * Set contiguity flags on the left and right neighbors.
+        * Don't let extents get too large, even if the pieces are contiguous.
+        */
+       STATE_SET(LEFT_CONTIG, 
+               STATE_TEST(LEFT_VALID) && STATE_TEST(LEFT_DELAY) &&
+               left.br_startoff + left.br_blockcount == new->br_startoff &&
+               left.br_blockcount + new->br_blockcount <= MAXEXTLEN);
+       STATE_SET(RIGHT_CONTIG,
+               STATE_TEST(RIGHT_VALID) && STATE_TEST(RIGHT_DELAY) &&
+               new->br_startoff + new->br_blockcount == right.br_startoff &&
+               new->br_blockcount + right.br_blockcount <= MAXEXTLEN &&
+               (!STATE_TEST(LEFT_CONTIG) ||
+                (left.br_blockcount + new->br_blockcount +
+                    right.br_blockcount <= MAXEXTLEN)));
+       /*
+        * Switch out based on the contiguity flags.
+        */
+       switch (SWITCH_STATE) {
+
+       case MASK2(LEFT_CONTIG, RIGHT_CONTIG):
+               /*
+                * New allocation is contiguous with delayed allocations
+                * on the left and on the right.
+                * Merge all three into a single extent list entry.
+                */
+               temp = left.br_blockcount + new->br_blockcount +
+                       right.br_blockcount;
+               xfs_bmap_trace_pre_update(fname, "LC|RC", ip, idx - 1,
+                       XFS_DATA_FORK);
+               xfs_bmbt_set_blockcount(ep - 1, temp);
+               oldlen = STARTBLOCKVAL(left.br_startblock) +
+                       STARTBLOCKVAL(new->br_startblock) +
+                       STARTBLOCKVAL(right.br_startblock);
+               newlen = xfs_bmap_worst_indlen(ip, temp);
+               xfs_bmbt_set_startblock(ep - 1, NULLSTARTBLOCK((int)newlen));
+               xfs_bmap_trace_post_update(fname, "LC|RC", ip, idx - 1,
+                       XFS_DATA_FORK);
+               xfs_bmap_trace_delete(fname, "LC|RC", ip, idx, 1,
+                       XFS_DATA_FORK);
+               xfs_bmap_delete_exlist(ip, idx, 1, XFS_DATA_FORK);
+               ip->i_df.if_lastex = idx - 1;
+               break;
+
+       case MASK(LEFT_CONTIG):
+               /*
+                * New allocation is contiguous with a delayed allocation
+                * on the left.
+                * Merge the new allocation with the left neighbor.
+                */
+               temp = left.br_blockcount + new->br_blockcount;
+               xfs_bmap_trace_pre_update(fname, "LC", ip, idx - 1,
+                       XFS_DATA_FORK);
+               xfs_bmbt_set_blockcount(ep - 1, temp);
+               oldlen = STARTBLOCKVAL(left.br_startblock) +
+                       STARTBLOCKVAL(new->br_startblock);
+               newlen = xfs_bmap_worst_indlen(ip, temp);
+               xfs_bmbt_set_startblock(ep - 1, NULLSTARTBLOCK((int)newlen));
+               xfs_bmap_trace_post_update(fname, "LC", ip, idx - 1,
+                       XFS_DATA_FORK);
+               ip->i_df.if_lastex = idx - 1;
+               break;
+
+       case MASK(RIGHT_CONTIG):
+               /*
+                * New allocation is contiguous with a delayed allocation
+                * on the right.
+                * Merge the new allocation with the right neighbor.
+                */
+               xfs_bmap_trace_pre_update(fname, "RC", ip, idx, XFS_DATA_FORK);
+               temp = new->br_blockcount + right.br_blockcount;
+               oldlen = STARTBLOCKVAL(new->br_startblock) +
+                       STARTBLOCKVAL(right.br_startblock);
+               newlen = xfs_bmap_worst_indlen(ip, temp);
+               xfs_bmbt_set_allf(ep, new->br_startoff,
+                       NULLSTARTBLOCK((int)newlen), temp, right.br_state); 
+               xfs_bmap_trace_post_update(fname, "RC", ip, idx, XFS_DATA_FORK);
+               ip->i_df.if_lastex = idx;
+               break;
+
+       case 0:
+               /*
+                * New allocation is not contiguous with another
+                * delayed allocation.
+                * Insert a new entry.
+                */
+               oldlen = newlen = 0;
+               xfs_bmap_trace_insert(fname, "0", ip, idx, 1, new, NULL,
+                       XFS_DATA_FORK);
+               xfs_bmap_insert_exlist(ip, idx, 1, new, XFS_DATA_FORK);
+               ip->i_df.if_lastex = idx;
+               break;
+       }
+       if (oldlen != newlen) {
+               ASSERT(oldlen > newlen);
+               xfs_mod_incore_sb(ip->i_mount, XFS_SBS_FDBLOCKS,
+                       (int)(oldlen - newlen), rsvd);
+               /*
+                * Nothing to do for disk quota accounting here.
+                */
+       }
+       *logflagsp = 0;
+       return 0;
+#undef MASK
+#undef MASK2
+#undef STATE_SET
+#undef STATE_TEST
+#undef STATE_SET_TEST
+#undef SWITCH_STATE
+}
+
+/*
+ * Called by xfs_bmap_add_extent to handle cases converting a hole
+ * to a real allocation.
+ */
+STATIC int                             /* error */
+xfs_bmap_add_extent_hole_real(
+       xfs_inode_t             *ip,    /* incore inode pointer */
+       xfs_extnum_t            idx,    /* extent number to update/insert */
+       xfs_btree_cur_t         *cur,   /* if null, not a btree */
+       xfs_bmbt_irec_t         *new,   /* new data to put in extent list */
+       int                     *logflagsp, /* inode logging flags */
+       int                     whichfork) /* data or attr fork */
+{
+       xfs_bmbt_rec_t          *ep;    /* pointer to extent entry ins. point */
+       int                     error;  /* error return value */
+#ifdef XFS_BMAP_TRACE
+       static char             fname[] = "xfs_bmap_add_extent_hole_real";
+#endif
+       int                     i;      /* temp state */
+       xfs_ifork_t             *ifp;   /* inode fork pointer */
+       xfs_bmbt_irec_t         left;   /* left neighbor extent entry */
+       xfs_bmbt_irec_t         right;  /* right neighbor extent entry */
+       int                     state;  /* state bits, accessed thru macros */
+       enum {                          /* bit number definitions for state */
+               LEFT_CONTIG,    RIGHT_CONTIG,
+               LEFT_DELAY,     RIGHT_DELAY,
+               LEFT_VALID,     RIGHT_VALID
+       };
+
+#define        MASK(b)                 (1 << (b))
+#define        MASK2(a,b)              (MASK(a) | MASK(b))
+#define        STATE_SET(b,v)          ((v) ? (state |= MASK(b)) : (state &= ~MASK(b)))
+#define        STATE_TEST(b)           (state & MASK(b))
+#define        STATE_SET_TEST(b,v)     ((v) ? ((state |= MASK(b)), 1) : \
+                                      ((state &= ~MASK(b)), 0))
+#define        SWITCH_STATE            (state & MASK2(LEFT_CONTIG, RIGHT_CONTIG))
+
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       ASSERT(idx <= ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t));
+       ep = &ifp->if_u1.if_extents[idx];
+       state = 0;
+       /*
+        * Check and set flags if this segment has a left neighbor.
+        */
+       if (STATE_SET_TEST(LEFT_VALID, idx > 0)) {
+               xfs_bmbt_get_all(ep - 1, &left);
+               STATE_SET(LEFT_DELAY, ISNULLSTARTBLOCK(left.br_startblock));
+       }
+       /*
+        * Check and set flags if this segment has a current value.
+        * Not true if we're inserting into the "hole" at eof.
+        */
+       if (STATE_SET_TEST(RIGHT_VALID,
+                          idx <
+                          ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))) {
+               xfs_bmbt_get_all(ep, &right);
+               STATE_SET(RIGHT_DELAY, ISNULLSTARTBLOCK(right.br_startblock));
+       }
+       /*
+        * We're inserting a real allocation between "left" and "right".
+        * Set the contiguity flags.  Don't let extents get too large.
+        */
+       STATE_SET(LEFT_CONTIG, 
+               STATE_TEST(LEFT_VALID) && !STATE_TEST(LEFT_DELAY) &&
+               left.br_startoff + left.br_blockcount == new->br_startoff &&
+               left.br_startblock + left.br_blockcount == new->br_startblock &&
+               left.br_state == new->br_state &&
+               left.br_blockcount + new->br_blockcount <= MAXEXTLEN);
+       STATE_SET(RIGHT_CONTIG,
+               STATE_TEST(RIGHT_VALID) && !STATE_TEST(RIGHT_DELAY) &&
+               new->br_startoff + new->br_blockcount == right.br_startoff &&
+               new->br_startblock + new->br_blockcount ==
+                   right.br_startblock &&
+               new->br_state == right.br_state &&
+               new->br_blockcount + right.br_blockcount <= MAXEXTLEN &&
+               (!STATE_TEST(LEFT_CONTIG) ||
+                left.br_blockcount + new->br_blockcount +
+                    right.br_blockcount <= MAXEXTLEN));
+
+       /*
+        * Select which case we're in here, and implement it.
+        */
+       switch (SWITCH_STATE) {
+
+       case MASK2(LEFT_CONTIG, RIGHT_CONTIG):
+               /*
+                * New allocation is contiguous with real allocations on the
+                * left and on the right.
+                * Merge all three into a single extent list entry.
+                */
+               xfs_bmap_trace_pre_update(fname, "LC|RC", ip, idx - 1,
+                       whichfork);
+               xfs_bmbt_set_blockcount(ep - 1,
+                       left.br_blockcount + new->br_blockcount +
+                       right.br_blockcount);
+               xfs_bmap_trace_post_update(fname, "LC|RC", ip, idx - 1,
+                       whichfork);
+               xfs_bmap_trace_delete(fname, "LC|RC", ip,
+                       idx, 1, whichfork);
+               xfs_bmap_delete_exlist(ip, idx, 1, whichfork);
+               ifp->if_lastex = idx - 1;
+               XFS_IFORK_NEXT_SET(ip, whichfork,
+                       XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
+               if (cur == NULL) {
+                       *logflagsp = XFS_ILOG_CORE | XFS_ILOG_FEXT(whichfork);
+                       return 0;
+               }
+               *logflagsp = XFS_ILOG_CORE;
+               if (error = xfs_bmbt_lookup_eq(cur, right.br_startoff,
+                               right.br_startblock, right.br_blockcount, &i))
+                       return error;
+               ASSERT(i == 1);
+               if (error = xfs_bmbt_delete(cur, 0, &i))
+                       return error;
+               ASSERT(i == 1);
+               if (error = xfs_bmbt_decrement(cur, 0, &i))
+                       return error;
+               ASSERT(i == 1);
+               error = xfs_bmbt_update(cur, left.br_startoff,
+                               left.br_startblock,
+                               left.br_blockcount + new->br_blockcount +
+                               right.br_blockcount, left.br_state);
+               return error;
+
+       case MASK(LEFT_CONTIG):
+               /*
+                * New allocation is contiguous with a real allocation
+                * on the left.
+                * Merge the new allocation with the left neighbor.
+                */
+               xfs_bmap_trace_pre_update(fname, "LC", ip, idx - 1, whichfork);
+               xfs_bmbt_set_blockcount(ep - 1,
+                       left.br_blockcount + new->br_blockcount);
+               xfs_bmap_trace_post_update(fname, "LC", ip, idx - 1, whichfork);
+               ifp->if_lastex = idx - 1;
+               if (cur == NULL) {
+                       *logflagsp = XFS_ILOG_FEXT(whichfork);
+                       return 0;
+               }
+               *logflagsp = 0;
+               if (error = xfs_bmbt_lookup_eq(cur, left.br_startoff,
+                               left.br_startblock, left.br_blockcount, &i))
+                       return error;
+               ASSERT(i == 1);
+               error = xfs_bmbt_update(cur, left.br_startoff,
+                               left.br_startblock,
+                               left.br_blockcount + new->br_blockcount,
+                               left.br_state);
+               return error;
+
+       case MASK(RIGHT_CONTIG):
+               /*
+                * New allocation is contiguous with a real allocation
+                * on the right.
+                * Merge the new allocation with the right neighbor.
+                */
+               xfs_bmap_trace_pre_update(fname, "RC", ip, idx, whichfork);
+               xfs_bmbt_set_allf(ep, new->br_startoff, new->br_startblock,
+                       new->br_blockcount + right.br_blockcount,
+                       right.br_state);
+               xfs_bmap_trace_post_update(fname, "RC", ip, idx, whichfork);
+               ifp->if_lastex = idx;
+               if (cur == NULL) {
+                       *logflagsp = XFS_ILOG_FEXT(whichfork);
+                       return 0;
+               }
+               *logflagsp = 0;
+               if (error = xfs_bmbt_lookup_eq(cur, right.br_startoff,
+                               right.br_startblock, right.br_blockcount, &i))
+                       return error;
+               ASSERT(i == 1);
+               error = xfs_bmbt_update(cur, new->br_startoff,
+                               new->br_startblock,
+                               new->br_blockcount + right.br_blockcount,
+                               right.br_state);
+               return error;
+
+       case 0:
+               /*
+                * New allocation is not contiguous with another
+                * real allocation.
+                * Insert a new entry.
+                */
+               xfs_bmap_trace_insert(fname, "0", ip, idx, 1, new, NULL,
+                       whichfork);
+               xfs_bmap_insert_exlist(ip, idx, 1, new, whichfork);
+               ifp->if_lastex = idx;
+               XFS_IFORK_NEXT_SET(ip, whichfork,
+                       XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
+               if (cur == NULL) {
+                       *logflagsp = XFS_ILOG_CORE | XFS_ILOG_FEXT(whichfork);
+                       return 0;
+               }
+               *logflagsp = XFS_ILOG_CORE;
+               if (error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
+                               new->br_startblock, new->br_blockcount, &i))
+                       return error;
+               ASSERT(i == 0);
+               cur->bc_rec.b.br_state = new->br_state;
+               if (error = xfs_bmbt_insert(cur, &i))
+                       return error;
+               ASSERT(i == 1);
+               return 0;
+       }
+#undef MASK
+#undef MASK2
+#undef STATE_SET
+#undef STATE_TEST
+#undef STATE_SET_TEST
+#undef SWITCH_STATE
+       /* NOTREACHED */
+       ASSERT(0);
+       return 0; /* keep gcc quite */
+}
+
+#define XFS_ALLOC_GAP_UNITS    4
+
+/*
+ * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file.
+ * It figures out where to ask the underlying allocator to put the new extent.
+ */
+STATIC int                             /* error */
+xfs_bmap_alloc(
+       xfs_bmalloca_t  *ap)            /* bmap alloc argument struct */
+{
+       xfs_fsblock_t   adjust;         /* adjustment to block numbers */
+       xfs_alloctype_t atype;          /* type for allocation routines */
+       int             error;          /* error return value */
+       xfs_agnumber_t  fb_agno;        /* ag number of ap->firstblock */
+       xfs_mount_t     *mp;            /* mount point structure */
+       int             nullfb;         /* true if ap->firstblock isn't set */
+       int             rt;             /* true if inode is realtime */
+#ifdef __KERNEL__
+       xfs_extlen_t    prod;           /* product factor for allocators */
+       xfs_extlen_t    ralen;          /* realtime allocation length */
+#endif
+
+#define        ISLEGAL(x,y)    \
+       (rt ? \
+               (x) < mp->m_sb.sb_rblocks : \
+               XFS_FSB_TO_AGNO(mp, x) == XFS_FSB_TO_AGNO(mp, y) && \
+               XFS_FSB_TO_AGNO(mp, x) < mp->m_sb.sb_agcount && \
+               XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks)
+
+       /*
+        * Set up variables.
+        */
+       mp = ap->ip->i_mount;
+       nullfb = ap->firstblock == NULLFSBLOCK;
+       rt = (ap->ip->i_d.di_flags & XFS_DIFLAG_REALTIME) && ap->userdata;
+       fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->firstblock);
+#ifdef __KERNEL__
+       if (rt) {
+               xfs_extlen_t    extsz;          /* file extent size for rt */
+               xfs_fileoff_t   nexto;          /* next file offset */
+               xfs_extlen_t    orig_alen;      /* original ap->alen */
+               xfs_fileoff_t   orig_end;       /* original off+len */
+               xfs_fileoff_t   orig_off;       /* original ap->off */
+               xfs_extlen_t    mod_off;        /* modulus calculations */
+               xfs_fileoff_t   prevo;          /* previous file offset */
+               xfs_rtblock_t   rtx;            /* realtime extent number */
+               xfs_extlen_t    temp;           /* temp for rt calculations */
+
+               /*
+                * Set prod to match the realtime extent size.
+                */
+               if (!(extsz = ap->ip->i_d.di_extsize))
+                       extsz = mp->m_sb.sb_rextsize;
+               prod = extsz / mp->m_sb.sb_rextsize;
+               orig_off = ap->off;
+               orig_alen = ap->alen;
+               orig_end = orig_off + orig_alen;
+               /*
+                * If the file offset is unaligned vs. the extent size
+                * we need to align it.  This will be possible unless
+                * the file was previously written with a kernel that didn't
+                * perform this alignment.
+                */
+               mod_off = do_mod(orig_off, extsz);
+               if (mod_off) {
+                       ap->alen += mod_off;
+                       ap->off -= mod_off;
+               }
+               /*
+                * Same adjustment for the end of the requested area.
+                */
+               if (temp = (ap->alen % extsz))
+                       ap->alen += extsz - temp;
+               /*
+                * If the previous block overlaps with this proposed allocation
+                * then move the start forward without adjusting the length.
+                */
+               prevo =
+                       ap->prevp->br_startoff == NULLFILEOFF ?
+                               0 :
+                               (ap->prevp->br_startoff +
+                                ap->prevp->br_blockcount);
+               if (ap->off != orig_off && ap->off < prevo)
+                       ap->off = prevo;
+               /*
+                * If the next block overlaps with this proposed allocation
+                * then move the start back without adjusting the length,
+                * but not before offset 0.
+                * This may of course make the start overlap previous block,
+                * and if we hit the offset 0 limit then the next block
+                * can still overlap too.
+                */
+               nexto = (ap->eof || ap->gotp->br_startoff == NULLFILEOFF) ? 
+                       NULLFILEOFF : ap->gotp->br_startoff;
+               if (!ap->eof &&
+                   ap->off + ap->alen != orig_end &&
+                   ap->off + ap->alen > nexto)
+                       ap->off = nexto > ap->alen ? nexto - ap->alen : 0;
+               /*
+                * If we're now overlapping the next or previous extent that
+                * means we can't fit an extsz piece in this hole.  Just move
+                * the start forward to the first legal spot and set
+                * the length so we hit the end.
+                */
+               if ((ap->off != orig_off && ap->off < prevo) ||
+                   (ap->off + ap->alen != orig_end &&
+                    ap->off + ap->alen > nexto)) {
+                       ap->off = prevo;
+                       ap->alen = nexto - prevo;
+               }
+               /*
+                * If the result isn't a multiple of rtextents we need to
+                * remove blocks until it is.
+                */
+               if (temp = (ap->alen % mp->m_sb.sb_rextsize)) {
+                       /*
+                        * We're not covering the original request, or
+                        * we won't be able to once we fix the length.
+                        */
+                       if (orig_off < ap->off ||
+                           orig_end > ap->off + ap->alen ||
+                           ap->alen - temp < orig_alen)
+                               return XFS_ERROR(EINVAL);
+                       /*
+                        * Try to fix it by moving the start up.
+                        */
+                       if (ap->off + temp <= orig_off) {
+                               ap->alen -= temp;
+                               ap->off += temp;
+                       }
+                       /*
+                        * Try to fix it by moving the end in.
+                        */
+                       else if (ap->off + ap->alen - temp >= orig_end)
+                               ap->alen -= temp;
+                       /*
+                        * Set the start to the minimum then trim the length.
+                        */
+                       else {
+                               ap->alen -= orig_off - ap->off;
+                               ap->off = orig_off;
+                               ap->alen -= ap->alen % mp->m_sb.sb_rextsize;
+                       }
+                       /*
+                        * Result doesn't cover the request, fail it.
+                        */
+                       if (orig_off < ap->off || orig_end > ap->off + ap->alen)
+                               return XFS_ERROR(EINVAL);
+               }
+               ASSERT(ap->alen % mp->m_sb.sb_rextsize == 0);
+               /*
+                * If the offset & length are not perfectly aligned
+                * then kill prod, it will just get us in trouble.
+                */
+               if (do_mod(ap->off, extsz) || ap->alen % extsz)
+                       prod = 1;
+               /*
+                * Set ralen to be the actual requested length in rtextents.
+                */
+               ralen = ap->alen / mp->m_sb.sb_rextsize;
+               /*
+                * If the old value was close enough to MAXEXTLEN that
+                * we rounded up to it, cut it back so it's legal again.
+                * Note that if it's a really large request (bigger than
+                * MAXEXTLEN), we don't hear about that number, and can't
+                * adjust the starting point to match it.
+                */
+               if (ralen * mp->m_sb.sb_rextsize >= MAXEXTLEN)
+                       ralen = MAXEXTLEN / mp->m_sb.sb_rextsize;
+               /*
+                * If it's an allocation to an empty file at offset 0,
+                * pick an extent that will space things out in the rt area.
+                */
+               if (ap->eof && ap->off == 0) {
+                       error = xfs_rtpick_extent(mp, ap->tp, ralen, &rtx);
+                       if (error)
+                               return error;
+                       ap->rval = rtx * mp->m_sb.sb_rextsize;
+               } else
+                       ap->rval = 0;
+       }
+#else
+       if (rt)
+               ap->rval = 0;
+#endif /* __KERNEL__ */
+       else if (nullfb)
+               ap->rval = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
+       else
+               ap->rval = ap->firstblock;
+       /*
+        * If allocating at eof, and there's a previous real block,
+        * try to use it's last block as our starting point.
+        */
+       if (ap->eof && ap->prevp->br_startoff != NULLFILEOFF &&
+           !ISNULLSTARTBLOCK(ap->prevp->br_startblock) &&
+           ISLEGAL(ap->prevp->br_startblock + ap->prevp->br_blockcount,
+                   ap->prevp->br_startblock)) {
+               ap->rval = ap->prevp->br_startblock + ap->prevp->br_blockcount;
+               /*
+                * Adjust for the gap between prevp and us.
+                */
+               adjust = ap->off -
+                       (ap->prevp->br_startoff + ap->prevp->br_blockcount);
+               if (adjust &&
+                   ISLEGAL(ap->rval + adjust, ap->prevp->br_startblock))
+                       ap->rval += adjust;
+       }
+       /*
+        * If not at eof, then compare the two neighbor blocks.
+        * Figure out whether either one gives us a good starting point,
+        * and pick the better one.
+        */
+       else if (!ap->eof) {
+               xfs_fsblock_t   gotbno;         /* right side block number */
+               xfs_fsblock_t   gotdiff;        /* right side difference */
+               xfs_fsblock_t   prevbno;        /* left side block number */
+               xfs_fsblock_t   prevdiff;       /* left side difference */
+
+               /*
+                * If there's a previous (left) block, select a requested
+                * start block based on it.
+                */
+               if (ap->prevp->br_startoff != NULLFILEOFF &&
+                   !ISNULLSTARTBLOCK(ap->prevp->br_startblock) &&
+                   (prevbno = ap->prevp->br_startblock +
+                              ap->prevp->br_blockcount) &&
+                   ISLEGAL(prevbno, ap->prevp->br_startblock)) {
+                       /*
+                        * Calculate gap to end of previous block.
+                        */
+                       adjust = prevdiff = ap->off -
+                               (ap->prevp->br_startoff +
+                                ap->prevp->br_blockcount);
+                       /*
+                        * Figure the startblock based on the previous block's
+                        * end and the gap size.
+                        * Heuristic!
+                        * If the gap is large relative to the piece we're
+                        * allocating, or using it gives us an illegal block
+                        * number, then just use the end of the previous block.
+                        */
+                       if (prevdiff <= XFS_ALLOC_GAP_UNITS * ap->alen &&
+                           ISLEGAL(prevbno + prevdiff,
+                                   ap->prevp->br_startblock))
+                               prevbno += adjust;
+                       else
+                               prevdiff += adjust;
+                       /*
+                        * If the firstblock forbids it, can't use it, 
+                        * must use default.
+                        */
+                       if (!rt && !nullfb &&
+                           XFS_FSB_TO_AGNO(mp, prevbno) != fb_agno)
+                               prevbno = NULLFSBLOCK;
+               }
+               /*
+                * No previous block or can't follow it, just default.
+                */
+               else
+                       prevbno = NULLFSBLOCK;
+               /*
+                * If there's a following (right) block, select a requested
+                * start block based on it.
+                */
+               if (!ISNULLSTARTBLOCK(ap->gotp->br_startblock)) {
+                       /*
+                        * Calculate gap to start of next block.
+                        */
+                       adjust = gotdiff = ap->gotp->br_startoff - ap->off;
+                       /*
+                        * Figure the startblock based on the next block's
+                        * start and the gap size.
+                        */
+                       gotbno = ap->gotp->br_startblock;
+                       /*
+                        * Heuristic!
+                        * If the gap is large relative to the piece we're
+                        * allocating, or using it gives us an illegal block
+                        * number, then just use the start of the next block
+                        * offset by our length.
+                        */
+                       if (gotdiff <= XFS_ALLOC_GAP_UNITS * ap->alen &&
+                           ISLEGAL(gotbno - gotdiff, gotbno))
+                               gotbno -= adjust;
+                       else if (ISLEGAL(gotbno - ap->alen, gotbno)) {
+                               gotbno -= ap->alen;
+                               gotdiff += adjust - ap->alen;
+                       } else
+                               gotdiff += adjust;
+                       /*
+                        * If the firstblock forbids it, can't use it, 
+                        * must use default.
+                        */
+                       if (!rt && !nullfb &&
+                           XFS_FSB_TO_AGNO(mp, gotbno) != fb_agno)
+                               gotbno = NULLFSBLOCK;
+               }
+               /*
+                * No next block, just default.
+                */
+               else
+                       gotbno = NULLFSBLOCK;
+               /*
+                * If both valid, pick the better one, else the only good
+                * one, else ap->rval is already set (to 0 or the inode block).
+                */
+               if (prevbno != NULLFSBLOCK && gotbno != NULLFSBLOCK)
+                       ap->rval = prevdiff <= gotdiff ? prevbno : gotbno;
+               else if (prevbno != NULLFSBLOCK)
+                       ap->rval = prevbno;
+               else if (gotbno != NULLFSBLOCK)
+                       ap->rval = gotbno;
+       }
+       /*
+        * If allowed, use ap->rval; otherwise must use firstblock since
+        * it's in the right allocation group.
+        */
+       if (nullfb || rt || XFS_FSB_TO_AGNO(mp, ap->rval) == fb_agno)
+               ;
+       else
+               ap->rval = ap->firstblock;
+       /*
+        * Realtime allocation, done through xfs_rtallocate_extent.
+        */
+       if (rt) {
+#ifndef __KERNEL__
+               ASSERT(0);
+#else
+               xfs_rtblock_t   rtb;
+
+               atype = ap->rval == 0 ?
+                       XFS_ALLOCTYPE_ANY_AG : XFS_ALLOCTYPE_NEAR_BNO;
+               do_div(ap->rval, mp->m_sb.sb_rextsize);
+               rtb = ap->rval;
+               ap->alen = ralen;
+               if (error = xfs_rtallocate_extent(ap->tp, ap->rval, 1, ap->alen,
+                               &ralen, atype, ap->wasdel, prod, &rtb))
+                       return error;
+               if (rtb == NULLFSBLOCK && prod > 1 &&
+                   (error = xfs_rtallocate_extent(ap->tp, ap->rval, 1,
+                                                  ap->alen, &ralen, atype,
+                                                  ap->wasdel, 1, &rtb)))
+                       return error;
+               ap->rval = rtb;
+               if (ap->rval != NULLFSBLOCK) {
+                       ap->rval *= mp->m_sb.sb_rextsize;
+                       ralen *= mp->m_sb.sb_rextsize;
+                       ap->alen = ralen;
+                       ap->ip->i_d.di_nblocks += ralen;
+                       xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
+                       if (ap->wasdel)
+                               ap->ip->i_delayed_blks -= ralen;
+                       /*
+                        * Adjust the disk quota also. This was reserved 
+                        * earlier.
+                        */
+                       if (XFS_IS_QUOTA_ON(mp) &&
+                           ap->ip->i_ino != mp->m_sb.sb_uquotino &&
+                           ap->ip->i_ino != mp->m_sb.sb_pquotino)
+                               xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
+                                       ap->wasdel ?
+                                               XFS_TRANS_DQ_DELRTBCOUNT :
+                                               XFS_TRANS_DQ_RTBCOUNT,
+                                       (long)ralen);
+               } else
+                       ap->alen = 0;
+#endif /* __KERNEL__ */
+       }
+       /*
+        * Normal allocation, done through xfs_alloc_vextent.
+        */
+       else {
+               xfs_agnumber_t  ag;
+               xfs_alloc_arg_t args;
+               xfs_extlen_t    blen;
+               xfs_extlen_t    delta;
+               int             isaligned;
+               xfs_extlen_t    longest;
+               xfs_extlen_t    need;
+               xfs_extlen_t    nextminlen;
+               int             notinit;
+               xfs_perag_t     *pag;
+               xfs_agnumber_t  startag;
+               int             tryagain;
+
+               tryagain = isaligned = 0;
+               args.tp = ap->tp;
+               args.mp = mp;
+               args.fsbno = ap->rval;
+               args.maxlen = MIN(ap->alen, mp->m_sb.sb_agblocks);
+               blen = 0;
+               if (nullfb) {
+                       args.type = XFS_ALLOCTYPE_START_BNO;
+                       args.total = ap->total;
+                       /*
+                        * Find the longest available space.
+                        * We're going to try for the whole allocation at once.
+                        */
+                       startag = ag = XFS_FSB_TO_AGNO(mp, args.fsbno);
+                       notinit = 0;
+                       mrlock(&mp->m_peraglock, MR_ACCESS, PINOD);
+                       while (blen < ap->alen) {
+                               pag = &mp->m_perag[ag];
+                               if (!pag->pagf_init &&
+                                   (error = xfs_alloc_pagf_init(mp, args.tp,
+                                           ag, XFS_ALLOC_FLAG_TRYLOCK))) {
+                                       mrunlock(&mp->m_peraglock);
+                                       return error;
+                               }
+                               /*
+                                * See xfs_alloc_fix_freelist...
+                                */
+                               if (pag->pagf_init) {
+                                       need = XFS_MIN_FREELIST_PAG(pag, mp);
+                                       delta = need > pag->pagf_flcount ?
+                                               need - pag->pagf_flcount : 0;
+                                       longest = (pag->pagf_longest > delta) ?
+                                               (pag->pagf_longest - delta) :
+                                               (pag->pagf_flcount > 0 ||
+                                                pag->pagf_longest > 0);
+                                       if (blen < longest)
+                                               blen = longest;
+                               } else
+                                       notinit = 1;
+                               if (++ag == mp->m_sb.sb_agcount) 
+                                       ag = 0;
+                               if (ag == startag)
+                                       break;
+                       }
+                       mrunlock(&mp->m_peraglock);
+                       /* 
+                        * Since the above loop did a BUF_TRYLOCK, it is
+                        * possible that there is space for this request.
+                        */ 
+                       if (notinit || blen < ap->minlen)
+                               args.minlen = ap->minlen;       
+                       /*
+                        * If the best seen length is less than the request
+                        * length, use the best as the minimum.
+                        */
+                       else if (blen < ap->alen)
+                               args.minlen = blen;
+                       /*
+                        * Otherwise we've seen an extent as big as alen,
+                        * use that as the minimum.
+                        */
+                       else 
+                               args.minlen = ap->alen;
+               } else if (ap->low) {
+                       args.type = XFS_ALLOCTYPE_FIRST_AG;
+                       args.total = args.minlen = ap->minlen;
+               } else {
+                       args.type = XFS_ALLOCTYPE_NEAR_BNO;
+                       args.total = ap->total;
+                       args.minlen = ap->minlen;
+               }
+               if (ap->ip->i_d.di_extsize) {
+                       args.prod = ap->ip->i_d.di_extsize;
+                       if (args.mod = (xfs_extlen_t)do_mod(ap->off, args.prod))
+                               args.mod = (xfs_extlen_t)(args.prod - args.mod);
+               } else if (mp->m_sb.sb_blocksize >= NBPP) {
+                       args.prod = 1;
+                       args.mod = 0;
+               } else {
+                       args.prod = NBPP >> mp->m_sb.sb_blocklog;
+                       if (args.mod = (xfs_extlen_t)(do_mod(ap->off, args.prod)))
+                               args.mod = (xfs_extlen_t)(args.prod - args.mod);
+               }
+               /*
+                * If we are not low on available data blocks, and the 
+                * underlying logical volume manager is a stripe, and
+                * the file offset is zero then try to allocate data 
+                * blocks on stripe unit boundary.
+                * NOTE: ap->aeof is only set if the allocation length
+                * is >= the stripe unit and the allocation offset is
+                * at the end of file. 
+                */ 
+               if (!ap->low && ap->aeof) {
+                       if (!ap->off) {
+                               args.alignment = mp->m_dalign;
+                               atype = args.type;
+                               isaligned = 1;
+                               /*
+                                * Adjust for alignment
+                                */
+                               if (blen > args.alignment && blen <= ap->alen) 
+                                       args.minlen = blen - args.alignment;
+                               args.minalignslop = 0;
+                       } else {
+                               /*
+                                * First try an exact bno allocation.
+                                * If it fails then do a near or start bno
+                                * allocation with alignment turned on.
+                                */
+                               atype = args.type;
+                               tryagain = 1;
+                               args.type = XFS_ALLOCTYPE_THIS_BNO;
+                               args.alignment = 1;
+                               /*
+                                * Compute the minlen+alignment for the
+                                * next case.  Set slop so that the value
+                                * of minlen+alignment+slop doesn't go up
+                                * between the calls.
+                                */
+                               if (blen > mp->m_dalign && blen <= ap->alen) 
+                                       nextminlen = blen - mp->m_dalign;
+                               else
+                                       nextminlen = args.minlen;
+                               if (nextminlen + mp->m_dalign > args.minlen + 1)
+                                       args.minalignslop =
+                                               nextminlen + mp->m_dalign -
+                                               args.minlen - 1;
+                               else
+                                       args.minalignslop = 0;
+                       }
+               } else {
+                       args.alignment = 1;
+                       args.minalignslop = 0;
+               }
+               args.minleft = ap->minleft;
+               args.wasdel = ap->wasdel;
+               args.isfl = 0;
+               args.userdata = ap->userdata;
+               if (error = xfs_alloc_vextent(&args))
+                       return error;
+               if (tryagain && args.fsbno == NULLFSBLOCK) {
+                       /*
+                        * Exact allocation failed. Now try with alignment
+                        * turned on.
+                        */
+                        args.type = atype;
+                        args.fsbno = ap->rval;
+                        args.alignment = mp->m_dalign;
+                       args.minlen = nextminlen;
+                       args.minalignslop = 0;
+                       isaligned = 1;
+                        if (error = xfs_alloc_vextent(&args))
+                                return error;
+                }
+               if (isaligned && args.fsbno == NULLFSBLOCK) {
+                       /* 
+                        * allocation failed, so turn off alignment and
+                        * try again.
+                        */
+                       args.type = atype;
+                       args.fsbno = ap->rval;
+                       args.alignment = 0;
+                       if (error = xfs_alloc_vextent(&args))
+                               return error;
+               }
+               if (args.fsbno == NULLFSBLOCK && nullfb &&
+                   args.minlen > ap->minlen) {
+                       args.minlen = ap->minlen;
+                       args.type = XFS_ALLOCTYPE_START_BNO;
+                       args.fsbno = ap->rval;
+                       if (error = xfs_alloc_vextent(&args))
+                               return error;
+               }
+               if (args.fsbno == NULLFSBLOCK && nullfb) {
+                       args.fsbno = 0;
+                       args.type = XFS_ALLOCTYPE_FIRST_AG;
+                       args.total = ap->minlen;
+                       args.minleft = 0;
+                       if (error = xfs_alloc_vextent(&args))
+                               return error;
+                       ap->low = 1;
+               }
+               if (args.fsbno != NULLFSBLOCK) {
+                       ap->firstblock = ap->rval = args.fsbno;
+                       ASSERT(nullfb || fb_agno == args.agno ||
+                              (ap->low && fb_agno < args.agno));
+                       ap->alen = args.len;
+                       ap->ip->i_d.di_nblocks += args.len;
+                       xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
+                       if (ap->wasdel)
+                               ap->ip->i_delayed_blks -= args.len;
+                       /*
+                        * Adjust the disk quota also. This was reserved 
+                        * earlier.
+                        */
+                       if (XFS_IS_QUOTA_ON(mp) &&
+                           ap->ip->i_ino != mp->m_sb.sb_uquotino &&
+                           ap->ip->i_ino != mp->m_sb.sb_pquotino)
+                               xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
+                                       ap->wasdel ?
+                                               XFS_TRANS_DQ_DELBCOUNT :
+                                               XFS_TRANS_DQ_BCOUNT,
+                                       (long)args.len);
+               } else {
+                       ap->rval = NULLFSBLOCK;
+                       ap->alen = 0;
+               }
+       }
+       return 0;
+#undef ISLEGAL
+}
+
+/*
+ * Transform a btree format file with only one leaf node, where the
+ * extents list will fit in the inode, into an extents format file.
+ * Since the extent list is already in-core, all we have to do is
+ * give up the space for the btree root and pitch the leaf block.
+ */
+STATIC int                             /* error */
+xfs_bmap_btree_to_extents(
+       xfs_trans_t             *tp,    /* transaction pointer */
+       xfs_inode_t             *ip,    /* incore inode pointer */
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       int                     *logflagsp, /* inode logging flags */
+       int                     whichfork,  /* data or attr fork */
+       int                     async)      /* xaction can be async */
+{
+       /* REFERENCED */
+       xfs_bmbt_block_t        *cblock;/* child btree block */
+       xfs_fsblock_t           cbno;   /* child block number */
+       xfs_buf_t                       *cbp;   /* child block's buffer */
+       int                     error;  /* error return value */
+       xfs_ifork_t             *ifp;   /* inode fork data */
+       xfs_mount_t             *mp;    /* mount point structure */
+       xfs_bmbt_ptr_t          *pp;    /* ptr to block address */
+       xfs_bmbt_block_t        *rblock;/* root btree block */
+
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       ASSERT(ifp->if_flags & XFS_IFEXTENTS);
+       ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
+       rblock = ifp->if_broot;
+       ASSERT(INT_GET(rblock->bb_level, ARCH_CONVERT) == 1);
+       ASSERT(INT_GET(rblock->bb_numrecs, ARCH_CONVERT) == 1);
+       ASSERT(XFS_BMAP_BROOT_MAXRECS(ifp->if_broot_bytes) == 1);
+       mp = ip->i_mount;
+       pp = XFS_BMAP_BROOT_PTR_ADDR(rblock, 1, ifp->if_broot_bytes);
+       *logflagsp = 0;
+#ifdef DEBUG
+       if (error = xfs_btree_check_lptr(cur, INT_GET(*pp, ARCH_CONVERT), 1))
+               return error;
+#endif
+       cbno = INT_GET(*pp, ARCH_CONVERT);
+       if (error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp,
+                       XFS_BMAP_BTREE_REF))
+               return error;
+       cblock = XFS_BUF_TO_BMBT_BLOCK(cbp);
+       if (error = xfs_btree_check_lblock(cur, cblock, 0, cbp))
+               return error;
+       xfs_bmap_add_free(cbno, 1, cur->bc_private.b.flist, mp);
+       if (!async)
+               xfs_trans_set_sync(tp);
+       ip->i_d.di_nblocks--;
+       if (XFS_IS_QUOTA_ON(mp) &&
+           ip->i_ino != mp->m_sb.sb_uquotino &&
+           ip->i_ino != mp->m_sb.sb_pquotino)
+               xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
+       xfs_trans_binval(tp, cbp);
+       if (cur->bc_bufs[0] == cbp)
+               cur->bc_bufs[0] = NULL;
+       xfs_iroot_realloc(ip, -1, whichfork);
+       ASSERT(ifp->if_broot == NULL);
+       ASSERT((ifp->if_flags & XFS_IFBROOT) == 0);
+       XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
+       *logflagsp = XFS_ILOG_CORE | XFS_ILOG_FEXT(whichfork);
+       return 0;
+}
+
+/*
+ * Called by xfs_bmapi to update extent list structure and the btree
+ * after removing space (or undoing a delayed allocation).
+ */
+STATIC int                             /* error */
+xfs_bmap_del_extent(
+       xfs_inode_t             *ip,    /* incore inode pointer */
+       xfs_trans_t             *tp,    /* current transaction pointer */
+       xfs_extnum_t            idx,    /* extent number to update/delete */
+       xfs_bmap_free_t         *flist, /* list of extents to be freed */
+       xfs_btree_cur_t         *cur,   /* if null, not a btree */
+       xfs_bmbt_irec_t         *del,   /* data to remove from extent list */
+       int                     iflags, /* input flags */           
+       int                     *logflagsp, /* inode logging flags */
+       int                     whichfork, /* data or attr fork */
+       int                     rsvd)   /* OK to allocate reserved blocks */
+{
+       xfs_filblks_t           da_new; /* new delay-alloc indirect blocks */
+       xfs_filblks_t           da_old; /* old delay-alloc indirect blocks */
+       xfs_fsblock_t           del_endblock;   /* first block past del */
+       xfs_fileoff_t           del_endoff;     /* first offset past del */
+       int                     delay;  /* current block is delayed allocated */
+       int                     do_fx;  /* free extent at end of routine */
+       xfs_bmbt_rec_t          *ep;    /* current extent entry pointer */
+       int                     error;  /* error return value */
+       int                     flags;  /* inode logging flags */
+#ifdef XFS_BMAP_TRACE
+       static char             fname[] = "xfs_bmap_del_extent";
+#endif
+       xfs_bmbt_irec_t         got;    /* current extent entry */
+       xfs_fileoff_t           got_endoff;     /* first offset past got */
+       int                     i;      /* temp state */
+       xfs_ifork_t             *ifp;   /* inode fork pointer */
+       xfs_mount_t             *mp;    /* mount structure */
+       xfs_filblks_t           nblks;  /* quota/sb block count */
+       xfs_bmbt_irec_t         new;    /* new record to be inserted */
+       /* REFERENCED */
+       xfs_extnum_t            nextents;       /* number of extents in list */
+       uint                    qfield; /* quota field to update */
+       xfs_filblks_t           temp;   /* for indirect length calculations */
+       xfs_filblks_t           temp2;  /* for indirect length calculations */
+       
+       XFS_STATS_INC(xs_del_exlist);
+       mp = ip->i_mount;       
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+       ASSERT(idx >= 0 && idx < nextents);
+       ASSERT(del->br_blockcount > 0);
+       ep = &ifp->if_u1.if_extents[idx];
+       xfs_bmbt_get_all(ep, &got);
+       ASSERT(got.br_startoff <= del->br_startoff);
+       del_endoff = del->br_startoff + del->br_blockcount;
+       got_endoff = got.br_startoff + got.br_blockcount;
+       ASSERT(got_endoff >= del_endoff);
+       delay = ISNULLSTARTBLOCK(got.br_startblock);
+       ASSERT(ISNULLSTARTBLOCK(del->br_startblock) == delay);
+       flags = 0;
+       qfield = 0;
+       error = 0;
+       /*
+        * If deleting a real allocation, must free up the disk space.
+        */
+       if (!delay) {
+               flags = XFS_ILOG_CORE;
+               /*
+                * Realtime allocation.  Free it and record di_nblocks update.
+                */
+               if (whichfork == XFS_DATA_FORK &&
+                   (ip->i_d.di_flags & XFS_DIFLAG_REALTIME)) {
+                       xfs_fsblock_t   bno;
+                       xfs_filblks_t   len;
+
+                       ASSERT(do_mod(del->br_blockcount,
+                                     mp->m_sb.sb_rextsize) == 0);
+                       ASSERT(do_mod(del->br_startblock,
+                                     mp->m_sb.sb_rextsize) == 0);
+                       bno = del->br_startblock;
+                       do_div(bno, mp->m_sb.sb_rextsize);
+                       len = del->br_blockcount;
+                       do_div(len, mp->m_sb.sb_rextsize);
+                       if (error = xfs_rtfree_extent(ip->i_transp, bno,
+                                       (xfs_extlen_t)len))
+                               goto done;
+                       do_fx = 0;
+                       nblks = len * mp->m_sb.sb_rextsize;
+                       if (XFS_IS_QUOTA_ON(mp) &&
+                           ip->i_ino != mp->m_sb.sb_uquotino &&
+                           ip->i_ino != mp->m_sb.sb_pquotino)
+                               qfield = XFS_TRANS_DQ_RTBCOUNT;
+               }
+               /*
+                * Ordinary allocation.
+                */
+               else {
+                       do_fx = 1;
+                       nblks = del->br_blockcount;
+                       if (XFS_IS_QUOTA_ON(mp) &&
+                           ip->i_ino != mp->m_sb.sb_uquotino &&
+                           ip->i_ino != mp->m_sb.sb_pquotino)
+                               qfield = XFS_TRANS_DQ_BCOUNT;
+                       /*
+                        * If we're freeing meta-data, then the transaction
+                        * that frees the blocks must be synchronous.  This
+                        * ensures that noone can reuse the blocks before
+                        * they are permanently free.  For regular data
+                        * it is the callers responsibility to make the
+                        * data permanently inaccessible before calling
+                        * here to free it.
+                        */
+                       if (iflags & XFS_BMAPI_METADATA)
+                               xfs_trans_set_sync(tp);
+               }
+               /*
+                * Set up del_endblock and cur for later.
+                */
+               del_endblock = del->br_startblock + del->br_blockcount;
+               if (cur) {
+                       if (error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
+                                       got.br_startblock, got.br_blockcount,
+                                       &i))
+                               goto done;
+                       ASSERT(i == 1);
+               }
+               da_old = da_new = 0;
+       } else {
+               da_old = STARTBLOCKVAL(got.br_startblock);
+               da_new = 0;
+               nblks = 0;
+               do_fx = 0;
+       }
+       /*
+        * Set flag value to use in switch statement.
+        * Left-contig is 2, right-contig is 1.
+        */
+       switch (((got.br_startoff == del->br_startoff) << 1) |
+               (got_endoff == del_endoff)) {
+       case 3:
+               /*
+                * Matches the whole extent.  Delete the entry.
+                */
+               xfs_bmap_trace_delete(fname, "3", ip, idx, 1, whichfork);
+               xfs_bmap_delete_exlist(ip, idx, 1, whichfork);
+               ifp->if_lastex = idx;
+               if (delay)
+                       break;
+               XFS_IFORK_NEXT_SET(ip, whichfork,
+                       XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
+               flags |= XFS_ILOG_CORE;
+               if (!cur) {
+                       flags |= XFS_ILOG_FEXT(whichfork);
+                       break;
+               }
+               if (error = xfs_bmbt_delete(cur, iflags & XFS_BMAPI_ASYNC, &i))
+                       goto done;
+               ASSERT(i == 1);
+               break;
+
+       case 2:
+               /*
+                * Deleting the first part of the extent.
+                */
+               xfs_bmap_trace_pre_update(fname, "2", ip, idx, whichfork);
+               xfs_bmbt_set_startoff(ep, del_endoff);
+               temp = got.br_blockcount - del->br_blockcount;
+               xfs_bmbt_set_blockcount(ep, temp);
+               ifp->if_lastex = idx;
+               if (delay) {
+                       temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
+                               da_old);
+                       xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
+                       xfs_bmap_trace_post_update(fname, "2", ip, idx,
+                               whichfork);
+                       da_new = temp;
+                       break;
+               }
+               xfs_bmbt_set_startblock(ep, del_endblock);
+               xfs_bmap_trace_post_update(fname, "2", ip, idx, whichfork);
+               if (!cur) {
+                       flags |= XFS_ILOG_FEXT(whichfork);
+                       break;
+               }
+               if (error = xfs_bmbt_update(cur, del_endoff, del_endblock,
+                               got.br_blockcount - del->br_blockcount,
+                               got.br_state))
+                       goto done;
+               break;
+
+       case 1:
+               /*
+                * Deleting the last part of the extent.
+                */
+               temp = got.br_blockcount - del->br_blockcount;
+               xfs_bmap_trace_pre_update(fname, "1", ip, idx, whichfork);
+               xfs_bmbt_set_blockcount(ep, temp);
+               ifp->if_lastex = idx;
+               if (delay) {
+                       temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
+                               da_old);
+                       xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
+                       xfs_bmap_trace_post_update(fname, "1", ip, idx,
+                               whichfork);
+                       da_new = temp;
+                       break;
+               }
+               xfs_bmap_trace_post_update(fname, "1", ip, idx, whichfork);
+               if (!cur) {
+                       flags |= XFS_ILOG_FEXT(whichfork);
+                       break;
+               }
+               if (error = xfs_bmbt_update(cur, got.br_startoff,
+                               got.br_startblock,
+                               got.br_blockcount - del->br_blockcount,
+                               got.br_state))
+                       goto done;
+               break;
+       
+       case 0:
+               /*
+                * Deleting the middle of the extent.
+                */
+               temp = del->br_startoff - got.br_startoff;
+               xfs_bmap_trace_pre_update(fname, "0", ip, idx, whichfork);
+               xfs_bmbt_set_blockcount(ep, temp);
+               new.br_startoff = del_endoff;
+               temp2 = got_endoff - del_endoff;
+               new.br_blockcount = temp2;
+               new.br_state = got.br_state;
+               if (!delay) {
+                       new.br_startblock = del_endblock;
+                       flags |= XFS_ILOG_CORE;
+                       if (cur) {
+                               if (error = xfs_bmbt_update(cur,
+                                               got.br_startoff,
+                                               got.br_startblock, temp,
+                                               got.br_state))
+                                       goto done;
+                               if (error = xfs_bmbt_increment(cur, 0, &i))
+                                       goto done;
+                               cur->bc_rec.b = new;
+                               error = xfs_bmbt_insert(cur, &i);
+                               if (error && error != ENOSPC)
+                                       goto done;
+                               /*
+                                * If get no-space back from btree insert,
+                                * it tried a split, and we have a zero
+                                * block reservation.
+                                * Fix up our state and return the error.
+                                */
+                               if (error == ENOSPC) {
+                                       /* 
+                                        * Reset the cursor, don't trust
+                                        * it after any insert operation.
+                                        */
+                                       if (error = xfs_bmbt_lookup_eq(cur,
+                                                       got.br_startoff,
+                                                       got.br_startblock,
+                                                       temp, &i))
+                                               goto done;
+                                       ASSERT(i == 1);
+                                       /*
+                                        * Update the btree record back
+                                        * to the original value.
+                                        */
+                                       if (error = xfs_bmbt_update(cur,
+                                                       got.br_startoff,
+                                                       got.br_startblock,
+                                                       got.br_blockcount,
+                                                       got.br_state))
+                                               goto done;
+                                       /*
+                                        * Reset the extent record back
+                                        * to the original value.
+                                        */
+                                       xfs_bmbt_set_blockcount(ep,
+                                               got.br_blockcount);
+                                       flags = 0;
+                                       error = XFS_ERROR(ENOSPC);
+                                       goto done;
+                               }
+                               ASSERT(i == 1);
+                       } else
+                               flags |= XFS_ILOG_FEXT(whichfork);
+                       XFS_IFORK_NEXT_SET(ip, whichfork,
+                               XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
+               } else {
+                       ASSERT(whichfork == XFS_DATA_FORK);
+                       temp = xfs_bmap_worst_indlen(ip, temp);
+                       xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
+                       temp2 = xfs_bmap_worst_indlen(ip, temp2);
+                       new.br_startblock = NULLSTARTBLOCK((int)temp2);
+                       da_new = temp + temp2;
+                       while (da_new > da_old) {
+                               if (temp) {
+                                       temp--;
+                                       da_new--;
+                                       xfs_bmbt_set_startblock(ep,
+                                               NULLSTARTBLOCK((int)temp));
+                               }
+                               if (da_new == da_old)
+                                       break;
+                               if (temp2) {
+                                       temp2--;
+                                       da_new--;
+                                       new.br_startblock = 
+                                               NULLSTARTBLOCK((int)temp2);
+                               }
+                       }
+               }
+               xfs_bmap_trace_post_update(fname, "0", ip, idx, whichfork);
+               xfs_bmap_trace_insert(fname, "0", ip, idx + 1, 1, &new, NULL,
+                       whichfork);
+               xfs_bmap_insert_exlist(ip, idx + 1, 1, &new, whichfork);
+               ifp->if_lastex = idx + 1;
+               break;
+       }
+       /*
+        * If we need to, add to list of extents to delete.
+        */
+       if (do_fx)
+               xfs_bmap_add_free(del->br_startblock, del->br_blockcount, flist,
+                       mp);
+       /*
+        * Adjust inode # blocks in the file.
+        */
+       if (nblks)
+               ip->i_d.di_nblocks -= nblks;
+       /*
+        * Adjust quota data.
+        */
+       if (qfield)
+               xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks);
+       /*
+        * Account for change in delayed indirect blocks.
+        * Nothing to do for disk quota accounting here.
+        */
+       ASSERT(da_old >= da_new);
+       if (da_old > da_new) 
+               xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, (int)(da_old - da_new),
+                       rsvd);
+done:
+       *logflagsp = flags;
+       return error;
+}
+
+/*
+ * Remove the entry "free" from the free item list.  Prev points to the
+ * previous entry, unless "free" is the head of the list.
+ */
+STATIC void
+xfs_bmap_del_free(
+       xfs_bmap_free_t         *flist, /* free item list header */
+       xfs_bmap_free_item_t    *prev,  /* previous item on list, if any */
+       xfs_bmap_free_item_t    *free)  /* list item to be freed */
+{
+       if (prev)
+               prev->xbfi_next = free->xbfi_next;
+       else
+               flist->xbf_first = free->xbfi_next;
+       flist->xbf_count--;
+       kmem_zone_free(xfs_bmap_free_item_zone, free);
+}
+
+/*
+ * Remove count entries from the extents array for inode "ip", starting
+ * at index "idx".  Copies the remaining items down over the deleted ones,
+ * and gives back the excess memory.
+ */
+STATIC void
+xfs_bmap_delete_exlist(
+       xfs_inode_t     *ip,            /* incore inode pointer */
+       xfs_extnum_t    idx,            /* starting delete index */
+       xfs_extnum_t    count,          /* count of items to delete */
+       int             whichfork)      /* data or attr fork */
+{
+       xfs_bmbt_rec_t  *base;          /* base of extent list */
+       xfs_ifork_t     *ifp;           /* inode fork pointer */
+       xfs_extnum_t    nextents;       /* number of extents in list after */
+
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       ASSERT(ifp->if_flags & XFS_IFEXTENTS);
+       base = ifp->if_u1.if_extents;
+       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - count;
+       ovbcopy(&base[idx + count], &base[idx],
+               (nextents - idx) * sizeof(*base));
+       xfs_iext_realloc(ip, -count, whichfork);
+}
+
+/*
+ * Convert an extents-format file into a btree-format file.
+ * The new file will have a root block (in the inode) and a single child block.
+ */
+STATIC int                                     /* error */
+xfs_bmap_extents_to_btree(
+       xfs_trans_t             *tp,            /* transaction pointer */
+       xfs_inode_t             *ip,            /* incore inode pointer */
+       xfs_fsblock_t           *firstblock,    /* first-block-allocated */
+       xfs_bmap_free_t         *flist,         /* blocks freed in xaction */
+       xfs_btree_cur_t         **curp,         /* cursor returned to caller */
+       int                     wasdel,         /* converting a delayed alloc */
+       int                     *logflagsp,     /* inode logging flags */
+       int                     whichfork)      /* data or attr fork */
+{
+       xfs_bmbt_block_t        *ablock;        /* allocated (child) bt block */
+       xfs_buf_t                       *abp;           /* buffer for ablock */
+       xfs_alloc_arg_t         args;           /* allocation arguments */
+       xfs_bmbt_rec_t          *arp;           /* child record pointer */
+       xfs_bmbt_block_t        *block;         /* btree root block */
+       xfs_btree_cur_t         *cur;           /* bmap btree cursor */
+       xfs_bmbt_rec_t          *ep;            /* extent list pointer */
+       int                     error;          /* error return value */
+       xfs_extnum_t            i;              /* extent list index */
+       xfs_ifork_t             *ifp;           /* inode fork pointer */
+       xfs_bmbt_key_t          *kp;            /* root block key pointer */
+       xfs_mount_t             *mp;            /* mount structure */
+       xfs_extnum_t            nextents;       /* extent list size */
+       xfs_bmbt_ptr_t          *pp;            /* root block address pointer */
+
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS);
+       ASSERT(ifp->if_ext_max ==
+              XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
+       /*
+        * Make space in the inode incore.
+        */
+       xfs_iroot_realloc(ip, 1, whichfork);
+       ifp->if_flags |= XFS_IFBROOT;
+       /*
+        * Fill in the root.
+        */
+       block = ifp->if_broot;
+       INT_SET(block->bb_magic, ARCH_CONVERT, XFS_BMAP_MAGIC);
+       INT_SET(block->bb_level, ARCH_CONVERT, 1);
+       INT_SET(block->bb_numrecs, ARCH_CONVERT, 1);
+       INT_SET(block->bb_leftsib, ARCH_CONVERT, NULLDFSBNO);
+        INT_SET(block->bb_rightsib, ARCH_CONVERT, NULLDFSBNO);
+       /*
+        * Need a cursor.  Can't allocate until bb_level is filled in.
+        */
+       mp = ip->i_mount;
+       cur = xfs_btree_init_cursor(mp, tp, NULL, 0, XFS_BTNUM_BMAP, ip,
+               whichfork);
+       cur->bc_private.b.firstblock = *firstblock;
+       cur->bc_private.b.flist = flist;
+       cur->bc_private.b.flags = wasdel ? XFS_BTCUR_BPRV_WASDEL : 0;
+       /*
+        * Convert to a btree with two levels, one record in root.
+        */
+       XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE);
+       args.tp = tp;
+       args.mp = mp;
+       if (*firstblock == NULLFSBLOCK) {
+               args.type = XFS_ALLOCTYPE_START_BNO;
+               args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
+       } else if (flist->xbf_low) {
+               args.type = XFS_ALLOCTYPE_START_BNO;
+               args.fsbno = *firstblock;
+       } else {
+               args.type = XFS_ALLOCTYPE_NEAR_BNO;
+               args.fsbno = *firstblock;
+       }
+       args.minlen = args.maxlen = args.prod = 1;
+       args.total = args.minleft = args.alignment = args.mod = args.isfl =
+               args.minalignslop = 0;
+       args.wasdel = wasdel;
+       *logflagsp = 0;
+       if (error = xfs_alloc_vextent(&args)) {
+               xfs_iroot_realloc(ip, -1, whichfork);
+               xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+               return error;
+       }
+       /*
+        * Allocation can't fail, the space was reserved.
+        */
+       ASSERT(args.fsbno != NULLFSBLOCK);
+       ASSERT(*firstblock == NULLFSBLOCK ||
+              args.agno == XFS_FSB_TO_AGNO(mp, *firstblock) ||
+              (flist->xbf_low &&
+               args.agno > XFS_FSB_TO_AGNO(mp, *firstblock)));
+       *firstblock = cur->bc_private.b.firstblock = args.fsbno;
+       cur->bc_private.b.allocated++;
+       ip->i_d.di_nblocks++;
+       if (XFS_IS_QUOTA_ON(mp) &&
+           ip->i_ino != mp->m_sb.sb_uquotino &&
+           ip->i_ino != mp->m_sb.sb_pquotino)
+               xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
+       abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0);
+       /*
+        * Fill in the child block.
+        */
+       ablock = XFS_BUF_TO_BMBT_BLOCK(abp);
+       INT_SET(ablock->bb_magic, ARCH_CONVERT, XFS_BMAP_MAGIC);
+       INT_ZERO(ablock->bb_level, ARCH_CONVERT);
+       INT_ZERO(ablock->bb_numrecs, ARCH_CONVERT);
+       INT_SET(ablock->bb_leftsib, ARCH_CONVERT, NULLDFSBNO);
+        INT_SET(ablock->bb_rightsib, ARCH_CONVERT, NULLDFSBNO);
+       arp = XFS_BMAP_REC_IADDR(ablock, 1, cur);
+       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+       for (ep = ifp->if_u1.if_extents, i = 0; i < nextents; i++, ep++) {
+               if (!ISNULLSTARTBLOCK(xfs_bmbt_get_startblock(ep))) {
+                       *arp++ = *ep;
+                       INT_MOD(ablock->bb_numrecs, ARCH_CONVERT, +1);
+               }
+       }
+       ASSERT(INT_GET(ablock->bb_numrecs, ARCH_CONVERT) == XFS_IFORK_NEXTENTS(ip, whichfork));
+       /*
+        * Fill in the root key and pointer.
+        */
+       kp = XFS_BMAP_KEY_IADDR(block, 1, cur);
+       arp = XFS_BMAP_REC_IADDR(ablock, 1, cur);
+       INT_SET(kp->br_startoff, ARCH_CONVERT, xfs_bmbt_get_startoff(arp));
+       pp = XFS_BMAP_PTR_IADDR(block, 1, cur);
+       INT_SET(*pp, ARCH_CONVERT, args.fsbno);
+       /*
+        * Do all this logging at the end so that 
+        * the root is at the right level.
+        */
+       xfs_bmbt_log_block(cur, abp, XFS_BB_ALL_BITS);
+       xfs_bmbt_log_recs(cur, abp, 1, INT_GET(ablock->bb_numrecs, ARCH_CONVERT));
+       ASSERT(*curp == NULL);
+       *curp = cur;
+       *logflagsp = XFS_ILOG_CORE | XFS_ILOG_FBROOT(whichfork);
+       return 0;
+}
+
+/*
+ * Insert new item(s) in the extent list for inode "ip".
+ * Count new items are inserted at offset idx.
+ */
+STATIC void
+xfs_bmap_insert_exlist(
+       xfs_inode_t     *ip,            /* incore inode pointer */
+       xfs_extnum_t    idx,            /* starting index of new items */
+       xfs_extnum_t    count,          /* number of inserted items */
+       xfs_bmbt_irec_t *new,           /* items to insert */
+       int             whichfork)      /* data or attr fork */
+{
+       xfs_bmbt_rec_t  *base;          /* extent list base */
+       xfs_ifork_t     *ifp;           /* inode fork pointer */
+       xfs_extnum_t    nextents;       /* extent list size */
+       xfs_extnum_t    to;             /* extent list index */
+
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       ASSERT(ifp->if_flags & XFS_IFEXTENTS);
+       xfs_iext_realloc(ip, count, whichfork);
+       base = ifp->if_u1.if_extents;
+       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+       ovbcopy(&base[idx], &base[idx + count],
+               (nextents - (idx + count)) * sizeof(*base));
+       for (to = idx; to < idx + count; to++, new++)
+               xfs_bmbt_set_all(&base[to], new);
+}
+
+/*
+ * Convert a local file to an extents file.
+ * This code is out of bounds for data forks of regular files,
+ * since the file data needs to get logged so things will stay consistent.
+ * (The bmap-level manipulations are ok, though).
+ */
+STATIC int                             /* error */
+xfs_bmap_local_to_extents(
+       xfs_trans_t     *tp,            /* transaction pointer */
+       xfs_inode_t     *ip,            /* incore inode pointer */
+       xfs_fsblock_t   *firstblock,    /* first block allocated in xaction */
+       xfs_extlen_t    total,          /* total blocks needed by transaction */
+       int             *logflagsp,     /* inode logging flags */
+       int             whichfork)      /* data or attr fork */
+{
+       int             error;          /* error return value */
+       int             flags;          /* logging flags returned */
+#ifdef XFS_BMAP_TRACE
+       static char     fname[] = "xfs_bmap_local_to_extents";
+#endif
+       xfs_ifork_t     *ifp;           /* inode fork pointer */
+
+       /*
+        * We don't want to deal with the case of keeping inode data inline yet.
+        * So sending the data fork of a regular inode is illegal.
+        */
+       ASSERT(!((ip->i_d.di_mode & IFMT) == IFREG && 
+                whichfork == XFS_DATA_FORK));
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
+       flags = 0;
+       error = 0;
+       if (ifp->if_bytes) {
+               xfs_alloc_arg_t args;   /* allocation arguments */
+               xfs_buf_t               *bp;    /* buffer for extent list block */
+               xfs_bmbt_rec_t  *ep;    /* extent list pointer */
+
+               args.tp = tp;
+               args.mp = ip->i_mount;
+               ASSERT(ifp->if_flags & XFS_IFINLINE);
+               /*
+                * Allocate a block.  We know we need only one, since the
+                * file currently fits in an inode.
+                */
+               if (*firstblock == NULLFSBLOCK) {
+                       args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino);
+                       args.type = XFS_ALLOCTYPE_START_BNO;
+               } else {
+                       args.fsbno = *firstblock;
+                       args.type = XFS_ALLOCTYPE_NEAR_BNO;
+               }
+               args.total = total;
+               args.mod = args.minleft = args.alignment = args.wasdel =
+                       args.isfl = args.minalignslop = 0;
+               args.minlen = args.maxlen = args.prod = 1;
+               if (error = xfs_alloc_vextent(&args))
+                       goto done;
+               /* 
+                * Can't fail, the space was reserved.
+                */
+               ASSERT(args.fsbno != NULLFSBLOCK);
+               ASSERT(args.len == 1);
+               *firstblock = args.fsbno;
+               bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0);
+               bcopy(ifp->if_u1.if_data, (char *)XFS_BUF_PTR(bp),
+                       ifp->if_bytes);
+               xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1);
+               xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
+               xfs_iext_realloc(ip, 1, whichfork);
+               ep = ifp->if_u1.if_extents;
+               xfs_bmbt_set_allf(ep, 0, args.fsbno, 1, XFS_EXT_NORM);
+               xfs_bmap_trace_post_update(fname, "new", ip, 0, whichfork);
+               XFS_IFORK_NEXT_SET(ip, whichfork, 1);
+               ip->i_d.di_nblocks = 1;
+               if (XFS_IS_QUOTA_ON(args.mp) &&
+                   ip->i_ino != args.mp->m_sb.sb_uquotino &&
+                   ip->i_ino != args.mp->m_sb.sb_pquotino)
+                       xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT,
+                               1L);
+               flags |= XFS_ILOG_FEXT(whichfork);
+       } else
+               ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) == 0);
+       ifp->if_flags &= ~XFS_IFINLINE;
+       ifp->if_flags |= XFS_IFEXTENTS;
+       XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
+       flags |= XFS_ILOG_CORE;
+done:
+       *logflagsp = flags;
+       return error;
+}
+
+xfs_bmbt_rec_t *                       /* pointer to found extent entry */
+xfs_bmap_do_search_extents(
+       xfs_bmbt_rec_t  *base,          /* base of extent list */
+       xfs_extnum_t    lastx,          /* last extent index used */
+       xfs_extnum_t    nextents,       /* extent list size */
+       xfs_fileoff_t   bno,            /* block number searched for */
+       int             *eofp,          /* out: end of file found */
+       xfs_extnum_t    *lastxp,        /* out: last extent index */
+       xfs_bmbt_irec_t *gotp,          /* out: extent entry found */
+       xfs_bmbt_irec_t *prevp)         /* out: previous extent entry found */
+{
+       xfs_bmbt_rec_t  *ep;            /* extent list entry pointer */
+       xfs_bmbt_irec_t got;            /* extent list entry, decoded */
+       int             high;           /* high index of binary search */
+       int             low;            /* low index of binary search */
+
+       if (lastx != NULLEXTNUM && lastx < nextents)
+               ep = base + lastx;
+       else
+               ep = NULL;
+       prevp->br_startoff = NULLFILEOFF;
+       if (ep && bno >= (got.br_startoff = xfs_bmbt_get_startoff(ep)) &&
+           bno < got.br_startoff +
+                 (got.br_blockcount = xfs_bmbt_get_blockcount(ep)))
+               *eofp = 0;
+       else if (ep && lastx < nextents - 1 &&
+                bno >= (got.br_startoff = xfs_bmbt_get_startoff(ep + 1)) &&
+                bno < got.br_startoff +
+                      (got.br_blockcount = xfs_bmbt_get_blockcount(ep + 1))) {
+               lastx++;
+               ep++;
+               *eofp = 0;
+       } else if (nextents == 0)
+               *eofp = 1;
+       else if (bno == 0 &&
+                (got.br_startoff = xfs_bmbt_get_startoff(base)) == 0) {
+               ep = base;
+               lastx = 0;
+               got.br_blockcount = xfs_bmbt_get_blockcount(ep);
+               *eofp = 0;
+       } else {
+               /* binary search the extents array */
+               low = 0;
+               high = nextents - 1;
+               while (low <= high) {
+                       XFS_STATS_INC(xs_cmp_exlist);
+                       lastx = (low + high) >> 1;
+                       ep = base + lastx;
+                       got.br_startoff = xfs_bmbt_get_startoff(ep);
+                       got.br_blockcount = xfs_bmbt_get_blockcount(ep);
+                       if (bno < got.br_startoff)
+                               high = lastx - 1;
+                       else if (bno >= got.br_startoff + got.br_blockcount)
+                               low = lastx + 1;
+                       else {
+                               got.br_startblock = xfs_bmbt_get_startblock(ep);
+                               got.br_state = xfs_bmbt_get_state(ep);
+                               *eofp = 0;
+                               *lastxp = lastx;
+                               *gotp = got;
+                               return ep;
+                       }
+               }
+               if (bno >= got.br_startoff + got.br_blockcount) {
+                       lastx++;
+                       if (lastx == nextents) {
+                               *eofp = 1;
+                               got.br_startblock = xfs_bmbt_get_startblock(ep);
+                               got.br_state = xfs_bmbt_get_state(ep);
+                               *prevp = got;
+                               ep = NULL;
+                       } else {
+                               *eofp = 0;
+                               xfs_bmbt_get_all(ep, prevp);
+                               ep++;
+                               got.br_startoff = xfs_bmbt_get_startoff(ep);
+                               got.br_blockcount = xfs_bmbt_get_blockcount(ep);
+                       }
+               } else {
+                       *eofp = 0;
+                       if (ep > base)
+                               xfs_bmbt_get_all(ep - 1, prevp);
+               }
+       }
+       if (ep) {
+               got.br_startblock = xfs_bmbt_get_startblock(ep);
+               got.br_state = xfs_bmbt_get_state(ep);
+       }
+       *lastxp = lastx;
+       *gotp = got;
+       return ep;
+}
+
+/*
+ * Search the extents list for the inode, for the extent containing bno.
+ * If bno lies in a hole, point to the next entry.  If bno lies past eof,
+ * *eofp will be set, and *prevp will contain the last entry (null if none).
+ * Else, *lastxp will be set to the index of the found
+ * entry; *gotp will contain the entry.
+ */
+STATIC xfs_bmbt_rec_t *                 /* pointer to found extent entry */
+xfs_bmap_search_extents(
+        xfs_inode_t     *ip,            /* incore inode pointer */
+        xfs_fileoff_t   bno,            /* block number searched for */
+        int             whichfork,      /* data or attr fork */
+        int             *eofp,          /* out: end of file found */
+        xfs_extnum_t    *lastxp,        /* out: last extent index */
+        xfs_bmbt_irec_t *gotp,          /* out: extent entry found */
+        xfs_bmbt_irec_t *prevp)         /* out: previous extent entry found */
+{ 
+       xfs_ifork_t     *ifp;           /* inode fork pointer */
+       xfs_bmbt_rec_t  *base;          /* base of extent list */
+       xfs_extnum_t    lastx;          /* last extent index used */
+        xfs_extnum_t    nextents;       /* extent list size */
+
+       XFS_STATS_INC(xs_look_exlist);
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       lastx = ifp->if_lastex;
+       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+       base = &ifp->if_u1.if_extents[0];
+
+       return xfs_bmap_do_search_extents(base, lastx, nextents, bno, eofp,
+                                         lastxp, gotp, prevp);
+}
+
+/*
+ * Compute the worst-case number of indirect blocks that will be used
+ * for ip's delayed extent of length "len".
+ */
+STATIC xfs_filblks_t
+xfs_bmap_worst_indlen(
+       xfs_inode_t     *ip,            /* incore inode pointer */
+       xfs_filblks_t   len)            /* delayed extent length */
+{
+       int             level;          /* btree level number */
+       int             maxrecs;        /* maximum record count at this level */
+       xfs_mount_t     *mp;            /* mount structure */
+       xfs_filblks_t   rval;           /* return value */
+
+       mp = ip->i_mount;
+       maxrecs = mp->m_bmap_dmxr[0];
+       for (level = 0, rval = 0;
+            level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK);
+            level++) {
+               len += maxrecs - 1;
+               do_div(len, maxrecs);
+               rval += len;
+               if (len == 1)
+                       return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
+                               level - 1;
+               if (level == 0)
+                       maxrecs = mp->m_bmap_dmxr[1];
+       }
+       return rval;
+}
+
+/*
+ * Add the extent to the list of extents to be free at transaction end.
+ * The list is maintained sorted (by block number).
+ */
+/* ARGSUSED */
+void
+xfs_bmap_add_free(
+       xfs_fsblock_t           bno,            /* fs block number of extent */
+       xfs_filblks_t           len,            /* length of extent */
+       xfs_bmap_free_t         *flist,         /* list of extents */
+       xfs_mount_t             *mp)            /* mount point structure */
+{
+       xfs_bmap_free_item_t    *cur;           /* current (next) element */
+       xfs_bmap_free_item_t    *new;           /* new element */
+       xfs_bmap_free_item_t    *prev;          /* previous element */
+#ifdef DEBUG
+       xfs_agnumber_t          agno;
+       xfs_agblock_t           agbno;
+
+       ASSERT(bno != NULLFSBLOCK);
+       ASSERT(len > 0);
+       ASSERT(len <= MAXEXTLEN);
+       ASSERT(!ISNULLSTARTBLOCK(bno));
+       agno = XFS_FSB_TO_AGNO(mp, bno);
+       agbno = XFS_FSB_TO_AGBNO(mp, bno);
+       ASSERT(agno < mp->m_sb.sb_agcount);
+       ASSERT(agbno < mp->m_sb.sb_agblocks);
+       ASSERT(len < mp->m_sb.sb_agblocks);
+       ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
+#endif
+       ASSERT(xfs_bmap_free_item_zone != NULL);
+       new = kmem_zone_alloc(xfs_bmap_free_item_zone, KM_SLEEP);
+       new->xbfi_startblock = bno;
+       new->xbfi_blockcount = (xfs_extlen_t)len;
+       for (prev = NULL, cur = flist->xbf_first;
+            cur != NULL;
+            prev = cur, cur = cur->xbfi_next) {
+               if (cur->xbfi_startblock >= bno)
+                       break;
+       }
+       if (prev)
+               prev->xbfi_next = new;
+       else
+               flist->xbf_first = new;
+       new->xbfi_next = cur;
+       flist->xbf_count++;
+}
+
+/* 
+ * Compute and fill in the value of the maximum depth of a bmap btree
+ * in this filesystem.  Done once, during mount.
+ */
+void
+xfs_bmap_compute_maxlevels(
+       xfs_mount_t     *mp,            /* file system mount structure */
+       int             whichfork)      /* data or attr fork */
+{
+       int             level;          /* btree level */
+       uint            maxblocks;      /* max blocks at this level */
+       uint            maxleafents;    /* max leaf entries possible */
+       int             maxrootrecs;    /* max records in root block */
+       int             minleafrecs;    /* min records in leaf block */
+       int             minnoderecs;    /* min records in node block */
+       int             sz;             /* root block size */
+
+       /*
+        * The maximum number of extents in a file, hence the maximum
+        * number of leaf entries, is controlled by the type of di_nextents
+        * (a signed 32-bit number, xfs_extnum_t), or by di_anextents
+        * (a signed 16-bit number, xfs_aextnum_t).
+        */
+       maxleafents = (whichfork == XFS_DATA_FORK) ? MAXEXTNUM : MAXAEXTNUM;
+       minleafrecs = mp->m_bmap_dmnr[0];
+       minnoderecs = mp->m_bmap_dmnr[1];
+       sz = (whichfork == XFS_DATA_FORK) ?
+               mp->m_attroffset :
+               mp->m_sb.sb_inodesize - mp->m_attroffset;
+       maxrootrecs = (int)XFS_BTREE_BLOCK_MAXRECS(sz, xfs_bmdr, 0);
+       maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
+       for (level = 1; maxblocks > 1; level++) {
+               if (maxblocks <= maxrootrecs)
+                       maxblocks = 1;
+               else
+                       maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
+       }
+       mp->m_bm_maxlevels[whichfork] = level;
+}
+
+/*
+ * Returns the file-relative block number of the first unused block(s)
+ * in the file with at least "len" logically contiguous blocks free.
+ * This is the lowest-address hole if the file has holes, else the first block
+ * past the end of file.
+ * Return 0 if the file is currently local (in-inode).
+ */
+int                                            /* error */
+xfs_bmap_first_unused(
+       xfs_trans_t     *tp,                    /* transaction pointer */
+       xfs_inode_t     *ip,                    /* incore inode */
+       xfs_extlen_t    len,                    /* size of hole to find */
+       xfs_fileoff_t   *first_unused,          /* unused block */
+       int             whichfork)              /* data or attr fork */
+{
+       xfs_bmbt_rec_t  *base;                  /* base of extent array */
+       xfs_bmbt_rec_t  *ep;                    /* pointer to an extent entry */
+       int             error;                  /* error return value */
+       xfs_ifork_t     *ifp;                   /* inode fork pointer */
+       xfs_fileoff_t   lastaddr;               /* last block number seen */
+       xfs_fileoff_t   lowest;                 /* lowest useful block */
+       xfs_fileoff_t   max;                    /* starting useful block */
+       xfs_fileoff_t   off;                    /* offset for this block */
+       xfs_extnum_t    nextents;               /* number of extent entries */
+
+       ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE ||
+              XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ||
+              XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
+       if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
+               *first_unused = 0;
+               return 0;
+       }
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       if (!(ifp->if_flags & XFS_IFEXTENTS) &&
+           (error = xfs_iread_extents(tp, ip, whichfork)))
+               return error;
+       lowest = *first_unused;
+       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+       base = &ifp->if_u1.if_extents[0];
+       for (lastaddr = 0, max = lowest, ep = base;
+            ep < &base[nextents];
+            ep++) {
+               off = xfs_bmbt_get_startoff(ep);
+               /*
+                * See if the hole before this extent will work.
+                */
+               if (off >= lowest + len && off - max >= len) {
+                       *first_unused = max;
+                       return 0;
+               }
+               lastaddr = off + xfs_bmbt_get_blockcount(ep);
+               max = XFS_FILEOFF_MAX(lastaddr, lowest);
+       }
+       *first_unused = max;
+       return 0;
+}
+
+/*
+ * Returns the file-relative block number of the last block + 1 before
+ * last_block (input value) in the file.
+ * This is not based on i_size, it is based on the extent list.
+ * Returns 0 for local files, as they do not have an extent list.
+ */
+int                                            /* error */
+xfs_bmap_last_before(
+       xfs_trans_t     *tp,                    /* transaction pointer */
+       xfs_inode_t     *ip,                    /* incore inode */
+       xfs_fileoff_t   *last_block,            /* last block */
+       int             whichfork)              /* data or attr fork */
+{
+       xfs_fileoff_t   bno;                    /* input file offset */
+       int             eof;                    /* hit end of file */
+       xfs_bmbt_rec_t  *ep;                    /* pointer to last extent */
+       int             error;                  /* error return value */
+       xfs_bmbt_irec_t got;                    /* current extent value */
+       xfs_ifork_t     *ifp;                   /* inode fork pointer */
+       xfs_extnum_t    lastx;                  /* last extent used */
+       xfs_bmbt_irec_t prev;                   /* previous extent value */
+
+       if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
+           XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
+           XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL)
+              return XFS_ERROR(EIO);
+       if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
+               *last_block = 0;
+               return 0;
+       }
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       if (!(ifp->if_flags & XFS_IFEXTENTS) &&
+           (error = xfs_iread_extents(tp, ip, whichfork)))
+               return error;
+       bno = *last_block - 1;
+       ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
+               &prev);
+       if (eof || xfs_bmbt_get_startoff(ep) > bno) {
+               if (prev.br_startoff == NULLFILEOFF)
+                       *last_block = 0;
+               else
+                       *last_block = prev.br_startoff + prev.br_blockcount;
+       }
+       /*
+        * Otherwise *last_block is already the right answer.
+        */
+       return 0;
+}
+
+/*
+ * Returns the file-relative block number of the first block past eof in
+ * the file.  This is not based on i_size, it is based on the extent list.
+ * Returns 0 for local files, as they do not have an extent list.
+ */
+int                                            /* error */
+xfs_bmap_last_offset(
+       xfs_trans_t     *tp,                    /* transaction pointer */
+       xfs_inode_t     *ip,                    /* incore inode */
+       xfs_fileoff_t   *last_block,            /* last block */
+       int             whichfork)              /* data or attr fork */
+{
+       xfs_bmbt_rec_t  *base;                  /* base of extent array */
+       xfs_bmbt_rec_t  *ep;                    /* pointer to last extent */
+       int             error;                  /* error return value */
+       xfs_ifork_t     *ifp;                   /* inode fork pointer */
+       xfs_extnum_t    nextents;               /* number of extent entries */
+
+       if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
+           XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
+           XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL)
+              return XFS_ERROR(EIO);
+       if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
+               *last_block = 0;
+               return 0;
+       }
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       if (!(ifp->if_flags & XFS_IFEXTENTS) &&
+           (error = xfs_iread_extents(tp, ip, whichfork)))
+               return error;
+       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+       if (!nextents) {
+               *last_block = 0;
+               return 0;
+       }
+       base = &ifp->if_u1.if_extents[0];
+       ASSERT(base != NULL);
+       ep = &base[nextents - 1];
+       *last_block = xfs_bmbt_get_startoff(ep) + xfs_bmbt_get_blockcount(ep);
+       return 0;
+}
+
+/*
+ * Returns whether the selected fork of the inode has exactly one
+ * block or not.  For the data fork we check this matches di_size,
+ * implying the file's range is 0..bsize-1.
+ */
+int                                    /* 1=>1 block, 0=>otherwise */
+xfs_bmap_one_block(
+       xfs_inode_t     *ip,            /* incore inode */
+       int             whichfork)      /* data or attr fork */
+{
+       xfs_bmbt_rec_t  *ep;            /* ptr to fork's extent */
+       xfs_ifork_t     *ifp;           /* inode fork pointer */
+       int             rval;           /* return value */
+       xfs_bmbt_irec_t s;              /* internal version of extent */
+
+#ifndef DEBUG
+       if (whichfork == XFS_DATA_FORK)
+               return ip->i_d.di_size == ip->i_mount->m_sb.sb_blocksize;
+#endif /* !DEBUG */
+       if (XFS_IFORK_NEXTENTS(ip, whichfork) != 1)
+               return 0;
+       if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
+               return 0;
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       ASSERT(ifp->if_flags & XFS_IFEXTENTS);
+       ep = ifp->if_u1.if_extents;
+       xfs_bmbt_get_all(ep, &s);
+       rval = s.br_startoff == 0 && s.br_blockcount == 1;
+       if (rval && whichfork == XFS_DATA_FORK)
+               ASSERT(ip->i_d.di_size == ip->i_mount->m_sb.sb_blocksize);
+       return rval;
+}
+
+/*
+ * Read in the extents to if_extents.
+ * All inode fields are set up by caller, we just traverse the btree
+ * and copy the records in. If the file system cannot contain unwritten
+ * extents, the records are checked for no "state" flags.
+ */
+int                                    /* error */
+xfs_bmap_read_extents(
+       xfs_trans_t             *tp,    /* transaction pointer */
+       xfs_inode_t             *ip,    /* incore inode */
+       int                     whichfork) /* data or attr fork */
+{
+       xfs_bmbt_block_t        *block; /* current btree block */
+       xfs_fsblock_t           bno;    /* block # of "block" */
+       xfs_buf_t                       *bp;    /* buffer for "block" */
+       int                     error;  /* error return value */
+       xfs_exntfmt_t           exntf;  /* XFS_EXTFMT_NOSTATE, if checking */
+#ifdef XFS_BMAP_TRACE
+       static char             fname[] = "xfs_bmap_read_extents";
+#endif
+       xfs_extnum_t            i;      /* index into the extents list */
+       xfs_ifork_t             *ifp;   /* fork structure */
+       int                     level;  /* btree level, for checking */
+       xfs_mount_t             *mp;    /* file system mount structure */
+       xfs_bmbt_ptr_t          *pp;    /* pointer to block address */
+       /* REFERENCED */
+       xfs_extnum_t            room;   /* number of entries there's room for */
+       xfs_bmbt_rec_t          *trp;   /* target record pointer */
+
+       bno = NULLFSBLOCK;
+       mp = ip->i_mount;
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       exntf = (whichfork != XFS_DATA_FORK) ? XFS_EXTFMT_NOSTATE :
+                                       XFS_EXTFMT_INODE(ip);
+       block = ifp->if_broot;
+       /*
+        * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
+        */
+       ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0);
+       level = INT_GET(block->bb_level, ARCH_CONVERT);
+       pp = XFS_BMAP_BROOT_PTR_ADDR(block, 1, ifp->if_broot_bytes);
+       ASSERT(INT_GET(*pp, ARCH_CONVERT) != NULLDFSBNO);
+       ASSERT(XFS_FSB_TO_AGNO(mp, INT_GET(*pp, ARCH_CONVERT)) < mp->m_sb.sb_agcount);
+       ASSERT(XFS_FSB_TO_AGBNO(mp, INT_GET(*pp, ARCH_CONVERT)) < mp->m_sb.sb_agblocks);
+       bno = INT_GET(*pp, ARCH_CONVERT);
+       /*
+        * Go down the tree until leaf level is reached, following the first
+        * pointer (leftmost) at each level.
+        */
+       while (level-- > 0) {
+               if (error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
+                               XFS_BMAP_BTREE_REF))
+                       return error;
+               block = XFS_BUF_TO_BMBT_BLOCK(bp);
+               XFS_WANT_CORRUPTED_GOTO(
+                       XFS_BMAP_SANITY_CHECK(mp, block, level),
+                       error0);
+               if (level == 0)
+                       break;
+               pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, block,
+                       1, mp->m_bmap_dmxr[1]);
+               XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, INT_GET(*pp, ARCH_CONVERT)), error0);
+               bno = INT_GET(*pp, ARCH_CONVERT);
+               xfs_trans_brelse(tp, bp);
+       }
+       /*
+        * Here with bp and block set to the leftmost leaf node in the tree.
+        */
+       room = ifp->if_bytes / (uint)sizeof(*trp);
+       trp = ifp->if_u1.if_extents;
+       i = 0;
+       /*
+        * Loop over all leaf nodes.  Copy information to the extent list.
+        */
+       for (;;) {
+               xfs_bmbt_rec_t  *frp;
+               xfs_fsblock_t   nextbno;
+               xfs_extnum_t    num_recs;
+
+
+               num_recs = INT_GET(block->bb_numrecs, ARCH_CONVERT);
+               if (i + num_recs > room) {
+                       ASSERT(i + num_recs <= room);
+                       xfs_fs_cmn_err(CE_WARN, ip->i_mount,
+       "corrupt dinode %Lu, (btree extents). Unmount and run xfs_repair.",
+                       ip->i_ino);
+                       goto error0;
+               }
+               XFS_WANT_CORRUPTED_GOTO(
+                       XFS_BMAP_SANITY_CHECK(mp, block, 0),
+                       error0);
+               /*
+                * Read-ahead the next leaf block, if any.
+                */
+               nextbno = INT_GET(block->bb_rightsib, ARCH_CONVERT);
+               if (nextbno != NULLFSBLOCK)
+                       xfs_btree_reada_bufl(mp, nextbno, 1);
+               /*
+                * Copy records into the extent list.
+                */
+               frp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt,
+                       block, 1, mp->m_bmap_dmxr[0]);
+               bcopy(frp, trp, num_recs * sizeof(*frp));
+               if (exntf == XFS_EXTFMT_NOSTATE) {
+                       /*
+                        * Check all attribute bmap btree records and
+                        * any "older" data bmap btree records for a 
+                        * set bit in the "extent flag" position.
+                        */
+                       if (xfs_check_nostate_extents(trp, num_recs)) {
+                               goto error0;
+                       }
+               }
+               trp += num_recs;
+               i += num_recs;
+               xfs_trans_brelse(tp, bp);
+               bno = nextbno;
+               /*
+                * If we've reached the end, stop.
+                */
+               if (bno == NULLFSBLOCK)
+                       break;
+               if (error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
+                               XFS_BMAP_BTREE_REF))
+                       return error;
+               block = XFS_BUF_TO_BMBT_BLOCK(bp);
+       }
+       ASSERT(i == ifp->if_bytes / (uint)sizeof(*trp));
+       ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork));
+       xfs_bmap_trace_exlist(fname, ip, i, whichfork);
+       return 0;
+error0:
+       xfs_trans_brelse(tp, bp);
+       return XFS_ERROR(EFSCORRUPTED);
+}
+
+/*
+ * Map file blocks to filesystem blocks.
+ * File range is given by the bno/len pair.
+ * Adds blocks to file if a write ("flags & XFS_BMAPI_WRITE" set)
+ * into a hole or past eof.
+ * Only allocates blocks from a single allocation group,
+ * to avoid locking problems.
+ * The returned value in "firstblock" from the first call in a transaction
+ * must be remembered and presented to subsequent calls in "firstblock".
+ * An upper bound for the number of blocks to be allocated is supplied to
+ * the first call in "total"; if no allocation group has that many free
+ * blocks then the call will fail (return NULLFSBLOCK in "firstblock").
+ */
+int                                    /* error */
+xfs_bmapi(
+       xfs_trans_t     *tp,            /* transaction pointer */
+       xfs_inode_t     *ip,            /* incore inode */
+       xfs_fileoff_t   bno,            /* starting file offs. mapped */
+       xfs_filblks_t   len,            /* length to map in file */
+       int             flags,          /* XFS_BMAPI_... */
+       xfs_fsblock_t   *firstblock,    /* first allocated block
+                                          controls a.g. for allocs */
+       xfs_extlen_t    total,          /* total blocks needed */
+       xfs_bmbt_irec_t *mval,          /* output: map values */
+       int             *nmap,          /* i/o: mval size/count */
+       xfs_bmap_free_t *flist)         /* i/o: list extents to free */
+{
+       xfs_fsblock_t   abno;           /* allocated block number */
+       xfs_extlen_t    alen;           /* allocated extent length */
+       xfs_fileoff_t   aoff;           /* allocated file offset */
+       xfs_bmalloca_t  bma;            /* args for xfs_bmap_alloc */
+       int             contig;         /* allocation must be one extent */
+       xfs_btree_cur_t *cur;           /* bmap btree cursor */
+       char            delay;          /* this request is for delayed alloc */
+       xfs_fileoff_t   end;            /* end of mapped file region */
+       int             eof;            /* we've hit the end of extent list */
+       xfs_bmbt_rec_t  *ep;            /* extent list entry pointer */
+       int             error;          /* error return */
+       char            exact;          /* don't do all of wasdelayed extent */
+       xfs_bmbt_irec_t got;            /* current extent list record */
+       xfs_ifork_t     *ifp;           /* inode fork pointer */
+       xfs_extlen_t    indlen;         /* indirect blocks length */
+       char            inhole;         /* current location is hole in file */
+       xfs_extnum_t    lastx;          /* last useful extent number */
+       int             logflags;       /* flags for transaction logging */
+       xfs_extlen_t    minleft;        /* min blocks left after allocation */
+       xfs_extlen_t    minlen;         /* min allocation size */
+       xfs_mount_t     *mp;            /* xfs mount structure */
+       int             n;              /* current extent index */
+       int             nallocs;        /* number of extents alloc\'d */
+       xfs_extnum_t    nextents;       /* number of extents in file */
+       xfs_fileoff_t   obno;           /* old block number (offset) */
+       xfs_bmbt_irec_t prev;           /* previous extent list record */
+       int             stateless;      /* ignore state flag set */
+       int             tmp_logflags;   /* temp flags holder */
+       char            trim;           /* output trimmed to match range */
+       char            userdata;       /* allocating non-metadata */
+       char            wasdelay;       /* old extent was delayed */
+       int             whichfork;      /* data or attr fork */
+       char            wr;             /* this is a write request */
+       int             rsvd;           /* OK to allocate reserved blocks */
+#ifdef DEBUG
+       xfs_fileoff_t   orig_bno;       /* original block number value */
+       int             orig_flags;     /* original flags arg value */
+       xfs_filblks_t   orig_len;       /* original value of len arg */
+       xfs_bmbt_irec_t *orig_mval;     /* original value of mval */
+       int             orig_nmap;      /* original value of *nmap */
+
+       orig_bno = bno;
+       orig_len = len;
+       orig_flags = flags;
+       orig_mval = mval;
+       orig_nmap = *nmap;
+#endif
+       ASSERT(*nmap >= 1);
+       ASSERT(*nmap <= XFS_BMAP_MAX_NMAP || !(flags & XFS_BMAPI_WRITE));
+       whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
+               XFS_ATTR_FORK : XFS_DATA_FORK;
+       if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
+           XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
+           XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL)
+               return XFS_ERROR(EFSCORRUPTED);
+       mp = ip->i_mount;
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return XFS_ERROR(EIO);
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       ASSERT(ifp->if_ext_max ==
+              XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
+       if (wr = (flags & XFS_BMAPI_WRITE) != 0)
+               XFS_STATS_INC(xs_blk_mapw);
+       else
+               XFS_STATS_INC(xs_blk_mapr);
+       delay = (flags & XFS_BMAPI_DELAY) != 0;
+       trim = (flags & XFS_BMAPI_ENTIRE) == 0;
+       userdata = (flags & XFS_BMAPI_METADATA) == 0;
+       exact = (flags & XFS_BMAPI_EXACT) != 0;
+       rsvd = (flags & XFS_BMAPI_RSVBLOCKS) != 0;
+       contig = (flags & XFS_BMAPI_CONTIG) != 0;
+       /*
+        * stateless is used to combine extents which
+        * differ only due to the state of the extents.
+        * This technique is used from xfs_getbmap()
+        * when the caller does not wish to see the
+        * separation (which is the default).
+        *
+        * This technique is also used when writing a 
+        * buffer which has been partially written,
+        * (usually by being flushed during a chunkread),
+        * to ensure one write takes place. This also
+        * prevents a change in the xfs inode extents at
+        * this time, intentionally. This change occurs
+        * on completion of the write operation, in
+        * xfs_strat_comp(), where the xfs_bmapi() call
+        * is transactioned, and the extents combined.
+        */
+       stateless = (flags & XFS_BMAPI_IGSTATE) != 0;
+       if (stateless && wr)    /* if writing unwritten space, no */
+               wr = 0;         /* allocations are allowed */
+       ASSERT(wr || !delay);
+       logflags = 0;
+       nallocs = 0;
+       cur = NULL;
+       if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
+               ASSERT(wr && tp);
+               if (error = xfs_bmap_local_to_extents(tp, ip, firstblock, total,
+                               &logflags, whichfork))
+                       goto error0;
+       }
+       if (wr && *firstblock == NULLFSBLOCK) {
+               if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE)
+                       minleft = INT_GET(ifp->if_broot->bb_level, ARCH_CONVERT) + 1;
+               else
+                       minleft = 1;
+       } else
+               minleft = 0;
+       if (!(ifp->if_flags & XFS_IFEXTENTS) &&
+           (error = xfs_iread_extents(tp, ip, whichfork)))
+               goto error0;
+       ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
+               &prev);
+       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+       n = 0;
+       end = bno + len;
+       obno = bno;
+       bma.ip = NULL;
+       while (bno < end && n < *nmap) {
+               /* 
+                * Reading past eof, act as though there's a hole
+                * up to end.
+                */
+               if (eof && !wr)
+                       got.br_startoff = end;
+               inhole = eof || got.br_startoff > bno;
+               wasdelay = wr && !inhole && !delay &&
+                       ISNULLSTARTBLOCK(got.br_startblock);
+               /*
+                * First, deal with the hole before the allocated space 
+                * that we found, if any.
+                */
+               if (wr && (inhole || wasdelay)) {
+                       /*
+                        * For the wasdelay case, we could also just
+                        * allocate the stuff asked for in this bmap call
+                        * but that wouldn't be as good.
+                        */
+                       if (wasdelay && !exact) {
+                               alen = (xfs_extlen_t)got.br_blockcount;
+                               aoff = got.br_startoff;
+                               if (lastx != NULLEXTNUM && lastx) {
+                                       ep = &ifp->if_u1.if_extents[lastx - 1];
+                                       xfs_bmbt_get_all(ep, &prev);
+                               }
+                       } else if (wasdelay) {
+                               alen = (xfs_extlen_t)
+                                       XFS_FILBLKS_MIN(len,
+                                               (got.br_startoff +
+                                                got.br_blockcount) - bno);
+                               aoff = bno;
+                       } else {
+                               alen = (xfs_extlen_t)
+                                       XFS_FILBLKS_MIN(len, MAXEXTLEN);
+                               if (!eof)
+                                       alen = (xfs_extlen_t)
+                                               XFS_FILBLKS_MIN(alen,
+                                                       got.br_startoff - bno);
+                               aoff = bno;
+                       }
+                       minlen = contig ? alen : 1;
+                       if (delay) {
+                               indlen = (xfs_extlen_t)
+                                       xfs_bmap_worst_indlen(ip, alen);
+                               ASSERT(indlen > 0);
+                               /*
+                                * Make a transaction-less quota reservation for
+                                * delayed allocation blocks. This number gets
+                                * adjusted later.
+                                * We return EDQUOT if we haven't allocated
+                                * blks already inside this loop;
+                                */
+                               if (XFS_IS_QUOTA_ON(ip->i_mount) &&
+                                   xfs_trans_reserve_blkquota(NULL, ip,
+                                           (long)alen)) {
+                                       if (n == 0) {
+                                               *nmap = 0;
+                                               ASSERT(cur == NULL);
+                                               return XFS_ERROR(EDQUOT);
+                                       }
+                                       break;
+                               }
+                               if (xfs_mod_incore_sb(ip->i_mount,
+                                               XFS_SBS_FDBLOCKS,
+                                               -(alen + indlen), rsvd)) {
+                                       if (XFS_IS_QUOTA_ON(ip->i_mount))
+                                               xfs_trans_unreserve_blkquota(
+                                                       NULL, ip, (long)alen);
+                                       break;
+                               }
+                               ip->i_delayed_blks += alen;
+                               abno = NULLSTARTBLOCK(indlen);
+                       } else {
+                               /*
+                                * If first time, allocate and fill in
+                                * once-only bma fields.
+                                */
+                               if (bma.ip == NULL) {
+                                       bma.tp = tp;
+                                       bma.ip = ip;
+                                       bma.prevp = &prev;
+                                       bma.gotp = &got;
+                                       bma.total = total;
+                                       bma.userdata = userdata;
+                               }
+                               /*
+                                * Fill in changeable bma fields.
+                                */
+                               bma.eof = eof;
+                               bma.firstblock = *firstblock;
+                               bma.alen = alen;
+                               bma.off = aoff;
+                               bma.wasdel = wasdelay;
+                               bma.minlen = minlen;
+                               bma.low = flist->xbf_low;
+                               bma.minleft = minleft;
+                               /*
+                                * Only want to do the alignment at the
+                                * eof if it is userdata and allocation length 
+                                * is larger than a stripe unit.
+                                */
+                               if (mp->m_dalign && alen >= mp->m_dalign &&
+                                   userdata && whichfork == XFS_DATA_FORK) {
+                                       if (error = xfs_bmap_isaeof(ip, aoff,
+                                                       whichfork, &bma.aeof))
+                                               goto error0;
+                               } else
+                                       bma.aeof = 0;
+                               /*
+                                * Call allocator.
+                                */
+                               if (error = xfs_bmap_alloc(&bma))
+                                       goto error0;
+                               /*
+                                * Copy out result fields.
+                                */
+                               abno = bma.rval;
+                               if (flist->xbf_low = bma.low)
+                                       minleft = 0;
+                               alen = bma.alen;
+                               aoff = bma.off;
+                               ASSERT(*firstblock == NULLFSBLOCK ||
+                                      XFS_FSB_TO_AGNO(ip->i_mount,
+                                              *firstblock) ==
+                                      XFS_FSB_TO_AGNO(ip->i_mount,
+                                              bma.firstblock) ||
+                                      (flist->xbf_low &&
+                                       XFS_FSB_TO_AGNO(ip->i_mount,
+                                               *firstblock) <
+                                       XFS_FSB_TO_AGNO(ip->i_mount,
+                                               bma.firstblock)));
+                               *firstblock = bma.firstblock;
+                               if (cur)
+                                       cur->bc_private.b.firstblock =
+                                               *firstblock;
+                               if (abno == NULLFSBLOCK)
+                                       break;
+                               if ((ifp->if_flags & XFS_IFBROOT) && !cur) {
+                                       cur = xfs_btree_init_cursor(ip->i_mount,
+                                               tp, NULL, 0, XFS_BTNUM_BMAP,
+                                               ip, whichfork);
+                                       cur->bc_private.b.firstblock =
+                                               *firstblock;
+                                       cur->bc_private.b.flist = flist;
+                               }
+                               /*
+                                * Bump the number of extents we've allocated
+                                * in this call.
+                                */
+                               nallocs++;
+                       }
+                       if (cur)
+                               cur->bc_private.b.flags =
+                                       wasdelay ? XFS_BTCUR_BPRV_WASDEL : 0;
+                       got.br_startoff = aoff;
+                       got.br_startblock = abno;
+                       got.br_blockcount = alen;
+                       got.br_state = XFS_EXT_NORM;    /* assume normal */
+                       /*
+                        * Determine state of extent, and the filesystem.
+                        * A wasdelay extent has been initialized, so 
+                        * shouldn't be flagged as unwritten.
+                        */
+                       if (wr && XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb)) {
+                               if (!wasdelay && (flags & XFS_BMAPI_PREALLOC))
+                                       got.br_state = XFS_EXT_UNWRITTEN;
+                       }
+                       error = xfs_bmap_add_extent(ip, lastx, &cur, &got,
+                               firstblock, flist, &tmp_logflags, whichfork,
+                               rsvd);
+                       logflags |= tmp_logflags;
+                       if (error)
+                               goto error0;
+                       lastx = ifp->if_lastex;
+                       ep = &ifp->if_u1.if_extents[lastx];
+                       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+                       xfs_bmbt_get_all(ep, &got);
+                       ASSERT(got.br_startoff <= aoff);
+                       ASSERT(got.br_startoff + got.br_blockcount >=
+                               aoff + alen);
+#ifdef DEBUG
+                       if (delay) {
+                               ASSERT(ISNULLSTARTBLOCK(got.br_startblock));
+                               ASSERT(STARTBLOCKVAL(got.br_startblock) > 0);
+                       }
+                       ASSERT(got.br_state == XFS_EXT_NORM ||
+                              got.br_state == XFS_EXT_UNWRITTEN);
+#endif
+                       /*
+                        * Fall down into the found allocated space case.
+                        */
+               } else if (inhole) {
+                       /*
+                        * Reading in a hole.
+                        */
+                       mval->br_startoff = bno;
+                       mval->br_startblock = HOLESTARTBLOCK;
+                       mval->br_blockcount =
+                               XFS_FILBLKS_MIN(len, got.br_startoff - bno);
+                       mval->br_state = XFS_EXT_NORM;
+                       bno += mval->br_blockcount;
+                       len -= mval->br_blockcount;
+                       mval++;
+                       n++;
+                       continue;
+               }
+               /*
+                * Then deal with the allocated space we found.
+                */
+               ASSERT(ep != NULL);
+               if (trim && (got.br_startoff + got.br_blockcount > obno)) {
+                       if (obno > bno)
+                               bno = obno;
+                       ASSERT((bno >= obno) || (n == 0));
+                       ASSERT(bno < end);
+                       mval->br_startoff = bno;
+                       if (ISNULLSTARTBLOCK(got.br_startblock)) {
+                               ASSERT(!wr || delay);
+                               mval->br_startblock = DELAYSTARTBLOCK;
+                       } else
+                               mval->br_startblock =
+                                       got.br_startblock +
+                                       (bno - got.br_startoff);
+                       /*
+                        * Return the minimum of what we got and what we
+                        * asked for for the length.  We can use the len
+                        * variable here because it is modified below
+                        * and we could have been there before coming
+                        * here if the first part of the allocation
+                        * didn't overlap what was asked for.
+                        */
+                       mval->br_blockcount =
+                               XFS_FILBLKS_MIN(end - bno, got.br_blockcount -
+                                       (bno - got.br_startoff));
+                       mval->br_state = got.br_state;
+                       ASSERT(mval->br_blockcount <= len);
+               } else {
+                       *mval = got;
+                       if (ISNULLSTARTBLOCK(mval->br_startblock)) {
+                               ASSERT(!wr || delay);
+                               mval->br_startblock = DELAYSTARTBLOCK;
+                       }
+               }
+
+               /*
+                * Check if writing previously allocated but
+                * unwritten extents.
+                */
+               if (wr && mval->br_state == XFS_EXT_UNWRITTEN &&
+                   ((flags & (XFS_BMAPI_PREALLOC|XFS_BMAPI_DELAY)) == 0)) {
+                       /*
+                        * Modify (by adding) the state flag, if writing.
+                        */
+                       ASSERT(mval->br_blockcount <= len);
+                       if ((ifp->if_flags & XFS_IFBROOT) && !cur) {
+                               cur = xfs_btree_init_cursor(ip->i_mount,
+                                       tp, NULL, 0, XFS_BTNUM_BMAP,
+                                       ip, whichfork);
+                               cur->bc_private.b.firstblock =
+                                       *firstblock;
+                               cur->bc_private.b.flist = flist;
+                       }
+                       mval->br_state = XFS_EXT_NORM;
+                       error = xfs_bmap_add_extent(ip, lastx, &cur, mval,
+                               firstblock, flist, &tmp_logflags, whichfork,
+                               rsvd);
+                       logflags |= tmp_logflags;
+                       if (error)
+                               goto error0;
+                       lastx = ifp->if_lastex;
+                       ep = &ifp->if_u1.if_extents[lastx];
+                       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+                       xfs_bmbt_get_all(ep, &got);
+                       /*
+                        * We may have combined previously unwritten 
+                        * space with written space, so generate 
+                        * another request.
+                        */
+                       if (mval->br_blockcount < len)
+                               continue;
+               }
+
+               ASSERT(!trim ||
+                      ((mval->br_startoff + mval->br_blockcount) <= end));
+               ASSERT(!trim || (mval->br_blockcount <= len) ||
+                      (mval->br_startoff < obno));
+               bno = mval->br_startoff + mval->br_blockcount;
+               len = end - bno;
+               if (n > 0 && mval->br_startoff == mval[-1].br_startoff) {
+                       ASSERT(mval->br_startblock == mval[-1].br_startblock);
+                       ASSERT(mval->br_blockcount > mval[-1].br_blockcount);
+                       ASSERT(mval->br_state == mval[-1].br_state);
+                       mval[-1].br_blockcount = mval->br_blockcount;
+                       mval[-1].br_state = mval->br_state;
+               } else if (n > 0 && mval->br_startblock != DELAYSTARTBLOCK &&
+                          mval[-1].br_startblock != DELAYSTARTBLOCK &&
+                          mval[-1].br_startblock != HOLESTARTBLOCK &&
+                          mval->br_startblock ==
+                          mval[-1].br_startblock + mval[-1].br_blockcount &&
+                          (stateless || mval[-1].br_state == mval->br_state)) {
+                       ASSERT(mval->br_startoff ==
+                              mval[-1].br_startoff + mval[-1].br_blockcount);
+                       mval[-1].br_blockcount += mval->br_blockcount;
+               } else if (n > 0 &&
+                          mval->br_startblock == DELAYSTARTBLOCK &&
+                          mval[-1].br_startblock == DELAYSTARTBLOCK &&
+                          mval->br_startoff ==
+                          mval[-1].br_startoff + mval[-1].br_blockcount) {
+                       mval[-1].br_blockcount += mval->br_blockcount;
+                       mval[-1].br_state = mval->br_state;
+               } else if (!((n == 0) &&
+                            ((mval->br_startoff + mval->br_blockcount) <=
+                             obno))) {
+                       mval++;
+                       n++;
+               }
+               /*
+                * If we're done, stop now.  Stop when we've allocated
+                * XFS_BMAP_MAX_NMAP extents no matter what.  Otherwise
+                * the transaction may get too big.
+                */
+               if (bno >= end || n >= *nmap || nallocs >= *nmap)
+                       break;
+               /*
+                * Else go on to the next record.
+                */
+               ep++;
+               lastx++;
+               if (lastx >= nextents) {
+                       eof = 1;
+                       prev = got;
+               } else
+                       xfs_bmbt_get_all(ep, &got);
+       }
+       ifp->if_lastex = lastx;
+       *nmap = n;
+       /*
+        * Transform from btree to extents, give it cur.
+        */
+       if (tp && XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE &&
+           XFS_IFORK_NEXTENTS(ip, whichfork) <= ifp->if_ext_max) {
+               ASSERT(wr && cur);
+               error = xfs_bmap_btree_to_extents(tp, ip, cur,
+                       &tmp_logflags, whichfork, 0);
+               logflags |= tmp_logflags;
+               if (error)
+                       goto error0;
+       }
+       ASSERT(ifp->if_ext_max ==
+              XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
+       ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE ||
+              XFS_IFORK_NEXTENTS(ip, whichfork) > ifp->if_ext_max);
+       error = 0;
+
+error0:
+       /*
+        * Log everything.  Do this after conversion, there's no point in
+        * logging the extent list if we've converted to btree format.
+        */
+       if ((logflags & XFS_ILOG_FEXT(whichfork)) &&
+           XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
+               logflags &= ~XFS_ILOG_FEXT(whichfork);
+       else if ((logflags & XFS_ILOG_FBROOT(whichfork)) &&
+                XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)
+               logflags &= ~XFS_ILOG_FBROOT(whichfork);
+       /*
+        * Log whatever the flags say, even if error.  Otherwise we might miss
+        * detecting a case where the data is changed, there's an error,
+        * and it's not logged so we don't shutdown when we should.
+        */
+       if (logflags) {
+               ASSERT(tp && wr);
+               xfs_trans_log_inode(tp, ip, logflags);
+       }
+       if (cur) {
+               if (!error) {
+                       ASSERT(*firstblock == NULLFSBLOCK ||
+                              XFS_FSB_TO_AGNO(ip->i_mount, *firstblock) ==
+                              XFS_FSB_TO_AGNO(ip->i_mount,
+                                      cur->bc_private.b.firstblock) ||
+                              (flist->xbf_low &&
+                               XFS_FSB_TO_AGNO(ip->i_mount, *firstblock) < 
+                               XFS_FSB_TO_AGNO(ip->i_mount,
+                                       cur->bc_private.b.firstblock)));
+                       *firstblock = cur->bc_private.b.firstblock;
+               }
+               xfs_btree_del_cursor(cur,
+                       error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+       }
+       if (!error)
+               xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval,
+                       orig_nmap, *nmap);
+       return error;
+}
+
+/*
+ * Map file blocks to filesystem blocks, simple version.
+ * One block (extent) only, read-only.
+ * For flags, only the XFS_BMAPI_ATTRFORK flag is examined.
+ * For the other flag values, the effect is as if XFS_BMAPI_METADATA
+ * was set and all the others were clear.
+ */
+int                                            /* error */
+xfs_bmapi_single(
+       xfs_trans_t     *tp,            /* transaction pointer */
+       xfs_inode_t     *ip,            /* incore inode */
+       int             whichfork,      /* data or attr fork */
+       xfs_fsblock_t   *fsb,           /* output: mapped block */
+       xfs_fileoff_t   bno)            /* starting file offs. mapped */
+{
+       int             eof;            /* we've hit the end of extent list */
+       int             error;          /* error return */
+       xfs_bmbt_irec_t got;            /* current extent list record */
+       xfs_ifork_t     *ifp;           /* inode fork pointer */
+       xfs_extnum_t    lastx;          /* last useful extent number */
+       xfs_bmbt_irec_t prev;           /* previous extent list record */
+
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
+           XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
+              return XFS_ERROR(EFSCORRUPTED);
+       if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+               return XFS_ERROR(EIO);
+       XFS_STATS_INC(xs_blk_mapr);
+       if (!(ifp->if_flags & XFS_IFEXTENTS) &&
+           (error = xfs_iread_extents(tp, ip, whichfork)))
+               return error;
+       (void)xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
+               &prev);
+       /* 
+        * Reading past eof, act as though there's a hole
+        * up to end.
+        */
+       if (eof || got.br_startoff > bno) {
+               *fsb = NULLFSBLOCK;
+               return 0;
+       }
+       ASSERT(!ISNULLSTARTBLOCK(got.br_startblock));
+       ASSERT(bno < got.br_startoff + got.br_blockcount);
+       *fsb = got.br_startblock + (bno - got.br_startoff);
+       ifp->if_lastex = lastx;
+       return 0;
+}
+
+/*
+ * Unmap (remove) blocks from a file.
+ * If nexts is nonzero then the number of extents to remove is limited to
+ * that value.  If not all extents in the block range can be removed then
+ * *done is set.
+ */
+int                                            /* error */
+xfs_bunmapi(
+       xfs_trans_t             *tp,            /* transaction pointer */
+       struct xfs_inode        *ip,            /* incore inode */
+       xfs_fileoff_t           bno,            /* starting offset to unmap */
+       xfs_filblks_t           len,            /* length to unmap in file */
+       int                     flags,          /* misc flags */            
+       xfs_extnum_t            nexts,          /* number of extents max */
+       xfs_fsblock_t           *firstblock,    /* first allocated block
+                                                  controls a.g. for allocs */
+       xfs_bmap_free_t         *flist,         /* i/o: list extents to free */
+       int                     *done)          /* set if not done yet */
+{
+       int                     async;          /* xactions can be async */
+       xfs_btree_cur_t         *cur;           /* bmap btree cursor */
+       xfs_bmbt_irec_t         del;            /* extent being deleted */
+       int                     eof;            /* is deleting at eof */
+       xfs_bmbt_rec_t          *ep;            /* extent list entry pointer */
+       int                     error;          /* error return value */
+       xfs_extnum_t            extno;          /* extent number in list */
+       xfs_bmbt_irec_t         got;            /* current extent list entry */
+       xfs_ifork_t             *ifp;           /* inode fork pointer */
+       int                     isrt;           /* freeing in rt area */
+       xfs_extnum_t            lastx;          /* last extent index used */
+       int                     logflags;       /* transaction logging flags */
+       xfs_extlen_t            mod;            /* rt extent offset */
+       xfs_mount_t             *mp;            /* mount structure */
+       xfs_extnum_t            nextents;       /* size of extent list */
+       xfs_bmbt_irec_t         prev;           /* previous extent list entry */
+       xfs_fileoff_t           start;          /* first file offset deleted */
+       int                     tmp_logflags;   /* partial logging flags */
+       int                     wasdel;         /* was a delayed alloc extent */
+       int                     whichfork;      /* data or attribute fork */
+       int                     rsvd;           /* OK to allocate reserved blocks */
+       xfs_fsblock_t           sum;
+
+       xfs_bunmap_trace(ip, bno, len, flags, (inst_t *)__return_address);
+       whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
+               XFS_ATTR_FORK : XFS_DATA_FORK;
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
+           XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)
+               return XFS_ERROR(EFSCORRUPTED);
+       mp = ip->i_mount;
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return XFS_ERROR(EIO);
+       async = flags & XFS_BMAPI_ASYNC;
+       rsvd = (flags & XFS_BMAPI_RSVBLOCKS) != 0;
+       ASSERT(len > 0);
+       ASSERT(nexts >= 0);
+       ASSERT(ifp->if_ext_max ==
+              XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
+       if (!(ifp->if_flags & XFS_IFEXTENTS) &&
+           (error = xfs_iread_extents(tp, ip, whichfork)))
+               return error;
+       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+       if (nextents == 0) {
+               *done = 1;
+               return 0;
+       }
+       XFS_STATS_INC(xs_blk_unmap);
+       isrt = (whichfork == XFS_DATA_FORK) &&
+              (ip->i_d.di_flags & XFS_DIFLAG_REALTIME);
+       start = bno;
+       bno = start + len - 1;
+       ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
+               &prev);
+       /*
+        * Check to see if the given block number is past the end of the
+        * file, back up to the last block if so...
+        */
+       if (eof) {
+               ep = &ifp->if_u1.if_extents[--lastx];
+               xfs_bmbt_get_all(ep, &got);
+               bno = got.br_startoff + got.br_blockcount - 1;
+       }
+       logflags = 0;
+       if (ifp->if_flags & XFS_IFBROOT) {
+               ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
+               cur = xfs_btree_init_cursor(mp, tp, NULL, 0, XFS_BTNUM_BMAP, ip,
+                       whichfork);
+               cur->bc_private.b.firstblock = *firstblock;
+               cur->bc_private.b.flist = flist;
+               cur->bc_private.b.flags = 0;
+       } else
+               cur = NULL;
+       extno = 0;
+       while (bno != (xfs_fileoff_t)-1 && bno >= start && lastx >= 0 &&
+              (nexts == 0 || extno < nexts)) {
+               /*
+                * Is the found extent after a hole in which bno lives?
+                * Just back up to the previous extent, if so.
+                */
+               if (got.br_startoff > bno) {
+                       if (--lastx < 0)
+                               break;
+                       ep--;
+                       xfs_bmbt_get_all(ep, &got);
+               }
+               /*
+                * Is the last block of this extent before the range
+                * we're supposed to delete?  If so, we're done.
+                */
+               bno = XFS_FILEOFF_MIN(bno,
+                       got.br_startoff + got.br_blockcount - 1);
+               if (bno < start)
+                       break;
+               /*
+                * Then deal with the (possibly delayed) allocated space
+                * we found.
+                */
+               ASSERT(ep != NULL);
+               del = got;
+               wasdel = ISNULLSTARTBLOCK(del.br_startblock);
+               if (got.br_startoff < start) {
+                       del.br_startoff = start;
+                       del.br_blockcount -= start - got.br_startoff;
+                       if (!wasdel)
+                               del.br_startblock += start - got.br_startoff;
+               }
+               if (del.br_startoff + del.br_blockcount > bno + 1)
+                       del.br_blockcount = bno + 1 - del.br_startoff;
+               sum = del.br_startblock + del.br_blockcount;
+               if (isrt &&
+                   (mod = do_mod(sum, mp->m_sb.sb_rextsize))) {
+                       /*
+                        * Realtime extent not lined up at the end.
+                        * The extent could have been split into written
+                        * and unwritten pieces, or we could just be
+                        * unmapping part of it.  But we can't really
+                        * get rid of part of a realtime extent.
+                        */
+                       if (del.br_state == XFS_EXT_UNWRITTEN ||
+                           !XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb)) {
+                               /*
+                                * This piece is unwritten, or we're not
+                                * using unwritten extents.  Skip over it.
+                                */
+                               ASSERT(bno >= mod);
+                               bno -= mod > del.br_blockcount ?
+                                       del.br_blockcount : mod;
+                               if (bno < got.br_startoff) {
+                                       if (--lastx >= 0)
+                                               xfs_bmbt_get_all(--ep, &got);
+                               }
+                               continue;
+                       }
+                       /*
+                        * It's written, turn it unwritten.
+                        * This is better than zeroing it.
+                        */
+                       ASSERT(del.br_state == XFS_EXT_NORM);
+                       ASSERT(xfs_trans_get_block_res(tp) > 0);
+                       /*
+                        * If this spans a realtime extent boundary,
+                        * chop it back to the start of the one we end at.
+                        */
+                       if (del.br_blockcount > mod) {
+                               del.br_startoff += del.br_blockcount - mod;
+                               del.br_startblock += del.br_blockcount - mod;
+                               del.br_blockcount = mod;
+                       }
+                       del.br_state = XFS_EXT_UNWRITTEN;
+                       error = xfs_bmap_add_extent(ip, lastx, &cur, &del,
+                               firstblock, flist, &logflags, XFS_DATA_FORK, 0);
+                       if (error)
+                               goto error0;
+                       goto nodelete;
+               }
+               if (isrt && (mod = do_mod(del.br_startblock, mp->m_sb.sb_rextsize))) {
+                       /*
+                        * Realtime extent is lined up at the end but not
+                        * at the front.  We'll get rid of full extents if
+                        * we can.
+                        */
+                       mod = mp->m_sb.sb_rextsize - mod;
+                       if (del.br_blockcount > mod) {
+                               del.br_blockcount -= mod;
+                               del.br_startoff += mod;
+                               del.br_startblock += mod;
+                       } else if ((del.br_startoff == start &&
+                                   (del.br_state == XFS_EXT_UNWRITTEN ||
+                                    xfs_trans_get_block_res(tp) == 0)) ||
+                                  !XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb)) {
+                               /*
+                                * Can't make it unwritten.  There isn't
+                                * a full extent here so just skip it.
+                                */
+                               ASSERT(bno >= del.br_blockcount);
+                               bno -= del.br_blockcount;
+                               if (bno < got.br_startoff) {
+                                       if (--lastx >= 0)
+                                               xfs_bmbt_get_all(--ep, &got);
+                               }
+                               continue;
+                       } else if (del.br_state == XFS_EXT_UNWRITTEN) {
+                               /*
+                                * This one is already unwritten.
+                                * It must have a written left neighbor.
+                                * Unwrite the killed part of that one and
+                                * try again.
+                                */
+                               ASSERT(lastx > 0);
+                               xfs_bmbt_get_all(ep - 1, &prev);
+                               ASSERT(prev.br_state == XFS_EXT_NORM);
+                               ASSERT(!ISNULLSTARTBLOCK(prev.br_startblock));
+                               ASSERT(del.br_startblock ==
+                                      prev.br_startblock + prev.br_blockcount);
+                               if (prev.br_startoff < start) {
+                                       mod = start - prev.br_startoff;
+                                       prev.br_blockcount -= mod;
+                                       prev.br_startblock += mod;
+                                       prev.br_startoff = start;
+                               }
+                               prev.br_state = XFS_EXT_UNWRITTEN;
+                               error = xfs_bmap_add_extent(ip, lastx - 1, &cur,
+                                       &prev, firstblock, flist, &logflags,
+                                       XFS_DATA_FORK, 0);
+                               if (error)
+                                       goto error0;
+                               goto nodelete;
+                       } else {
+                               ASSERT(del.br_state == XFS_EXT_NORM);
+                               del.br_state = XFS_EXT_UNWRITTEN;
+                               error = xfs_bmap_add_extent(ip, lastx, &cur,
+                                       &del, firstblock, flist, &logflags,
+                                       XFS_DATA_FORK, 0);
+                               if (error)
+                                       goto error0;
+                               goto nodelete;
+                       }
+               }
+               if (wasdel) {
+                       ASSERT(STARTBLOCKVAL(del.br_startblock) > 0);
+                       xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS,
+                               (int)del.br_blockcount, rsvd);
+                       if (XFS_IS_QUOTA_ON(ip->i_mount)) {
+                               ASSERT(ip->i_ino != mp->m_sb.sb_uquotino);
+                               ASSERT(ip->i_ino != mp->m_sb.sb_pquotino);
+                               if (!isrt)
+                                       xfs_trans_unreserve_blkquota(NULL, ip, 
+                                             (long)del.br_blockcount);
+                               else
+                                       xfs_trans_unreserve_rtblkquota(NULL, ip,
+                                             (long)del.br_blockcount);
+                       }
+                       ip->i_delayed_blks -= del.br_blockcount;
+                       if (cur)
+                               cur->bc_private.b.flags |=
+                                       XFS_BTCUR_BPRV_WASDEL;
+               } else if (cur)
+                       cur->bc_private.b.flags &= ~XFS_BTCUR_BPRV_WASDEL;
+               /*
+                * If it's the case where the directory code is running
+                * with no block reservation, and the deleted block is in 
+                * the middle of its extent, and the resulting insert
+                * of an extent would cause transformation to btree format,
+                * then reject it.  The calling code will then swap
+                * blocks around instead.
+                * We have to do this now, rather than waiting for the
+                * conversion to btree format, since the transaction
+                * will be dirty.
+                */
+               if (!wasdel && xfs_trans_get_block_res(tp) == 0 &&
+                   XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
+                   XFS_IFORK_NEXTENTS(ip, whichfork) >= ifp->if_ext_max &&
+                   del.br_startoff > got.br_startoff &&
+                   del.br_startoff + del.br_blockcount < 
+                   got.br_startoff + got.br_blockcount) {
+                       error = XFS_ERROR(ENOSPC);
+                       goto error0;
+               }
+               error = xfs_bmap_del_extent(ip, tp, lastx, flist, cur, &del,
+                       flags, &tmp_logflags, whichfork, rsvd);
+               logflags |= tmp_logflags;
+               if (error)
+                       goto error0;
+               bno = del.br_startoff - 1;
+nodelete:
+               lastx = ifp->if_lastex;
+               /*
+                * If not done go on to the next (previous) record.
+                * Reset ep in case the extents array was re-alloced.
+                */
+               ep = &ifp->if_u1.if_extents[lastx];
+               if (bno != (xfs_fileoff_t)-1 && bno >= start) {
+                       if (lastx >= XFS_IFORK_NEXTENTS(ip, whichfork) ||
+                           xfs_bmbt_get_startoff(ep) > bno) {
+                               lastx--;
+                               ep--;
+                       }
+                       if (lastx >= 0)
+                               xfs_bmbt_get_all(ep, &got);
+                       extno++;
+               }
+       }
+       ifp->if_lastex = lastx;
+       *done = bno == (xfs_fileoff_t)-1 || bno < start || lastx < 0;
+       ASSERT(ifp->if_ext_max ==
+              XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
+       /*
+        * Convert to a btree if necessary.
+        */
+       if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
+           XFS_IFORK_NEXTENTS(ip, whichfork) > ifp->if_ext_max) {
+               ASSERT(cur == NULL);
+               error = xfs_bmap_extents_to_btree(tp, ip, firstblock, flist,
+                       &cur, 0, &tmp_logflags, whichfork);
+               logflags |= tmp_logflags;
+               if (error)
+                       goto error0;
+       }
+       /*
+        * transform from btree to extents, give it cur
+        */
+       else if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE &&
+                XFS_IFORK_NEXTENTS(ip, whichfork) <= ifp->if_ext_max) {
+               ASSERT(cur != NULL);
+               error = xfs_bmap_btree_to_extents(tp, ip, cur, &tmp_logflags,
+                       whichfork, async);
+               logflags |= tmp_logflags;
+               if (error)
+                       goto error0;
+       }
+       /*
+        * transform from extents to local?
+        */
+       ASSERT(ifp->if_ext_max ==
+              XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
+       error = 0;
+error0:
+       /*
+        * Log everything.  Do this after conversion, there's no point in
+        * logging the extent list if we've converted to btree format.
+        */
+       if ((logflags & XFS_ILOG_FEXT(whichfork)) &&
+           XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
+               logflags &= ~XFS_ILOG_FEXT(whichfork);
+       else if ((logflags & XFS_ILOG_FBROOT(whichfork)) &&
+                XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)
+               logflags &= ~XFS_ILOG_FBROOT(whichfork);
+       /*
+        * Log inode even in the error case, if the transaction 
+        * is dirty we'll need to shut down the filesystem.
+        */
+       if (logflags)
+               xfs_trans_log_inode(tp, ip, logflags);
+       if (cur) {
+               if (!error) {
+                       *firstblock = cur->bc_private.b.firstblock;
+                       cur->bc_private.b.allocated = 0;
+               }
+               xfs_btree_del_cursor(cur,
+                       error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+       }
+       return error;
+}
+
+/*
+ * Check the last inode extent to determine whether this allocation will result 
+ * in blocks being allocated at the end of the file. When we allocate new data 
+ * blocks at the end of the file which do not start at the previous data block,
+ * we will try to align the new blocks at stripe unit boundaries.
+ */
+int                                    /* error */
+xfs_bmap_isaeof(
+       xfs_inode_t     *ip,            /* incore inode pointer */
+       xfs_fileoff_t   off,            /* file offset in fsblocks */
+       int             whichfork,      /* data or attribute fork */
+       int             *aeof)          /* return value */
+{
+       int             error;          /* error return value */
+       xfs_ifork_t     *ifp;           /* inode fork pointer */
+       xfs_bmbt_rec_t  *lastrec;       /* extent list entry pointer */
+       xfs_extnum_t    nextents;       /* size of extent list */
+       xfs_bmbt_irec_t s;              /* expanded extent list entry */
+
+       ASSERT(whichfork == XFS_DATA_FORK);
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       if (!(ifp->if_flags & XFS_IFEXTENTS) &&
+           (error = xfs_iread_extents(NULL, ip, whichfork)))
+               return error;
+       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+       if (nextents == 0) {
+               *aeof = 1;
+               return 0;
+       }
+       /*
+        * Go to the last extent
+        */
+       lastrec = &ifp->if_u1.if_extents[nextents - 1];
+       xfs_bmbt_get_all(lastrec, &s);
+       /*
+        * Check we are allocating in the last extent (for delayed allocations)
+        * or past the last extent for non-delayed allocations.
+        */ 
+       *aeof = (off >= s.br_startoff &&
+                off < s.br_startoff + s.br_blockcount &&
+                ISNULLSTARTBLOCK(s.br_startblock)) ||
+               off >= s.br_startoff + s.br_blockcount;
+       return 0;
+}
diff --git a/libxfs/xfs_bmap_btree.c b/libxfs/xfs_bmap_btree.c
new file mode 100644 (file)
index 0000000..a221892
--- /dev/null
@@ -0,0 +1,2528 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <xfs.h>
+
+/*
+ * Delete record pointed to by cur/level.
+ */
+STATIC int                                     /* error */
+xfs_bmbt_delrec(
+       xfs_btree_cur_t         *cur,
+       int                     level,
+       int                     async,          /* deletion can be async */
+       int                     *stat)          /* success/failure */
+{
+       xfs_bmbt_block_t        *block;         /* bmap btree block */
+       xfs_fsblock_t           bno;            /* fs-relative block number */
+       xfs_buf_t                       *bp;            /* buffer for block */
+       int                     error;          /* error return value */
+#ifdef XFS_BMBT_TRACE
+       static char             fname[] = "xfs_bmbt_delrec";
+#endif
+       int                     i;              /* loop counter */
+       int                     j;              /* temp state */
+       xfs_bmbt_key_t          key;            /* bmap btree key */
+       xfs_bmbt_key_t          *kp;            /* pointer to bmap btree key */
+       xfs_fsblock_t           lbno;           /* left sibling block number */
+       xfs_buf_t                       *lbp;           /* left buffer pointer */
+       xfs_bmbt_block_t        *left;          /* left btree block */
+       xfs_bmbt_key_t          *lkp;           /* left btree key */
+       xfs_bmbt_ptr_t          *lpp;           /* left address pointer */
+       int                     lrecs;          /* left record count */
+       xfs_bmbt_rec_t          *lrp;           /* left record pointer */
+       xfs_mount_t             *mp;            /* file system mount point */
+       xfs_bmbt_ptr_t          *pp;            /* pointer to bmap block addr */
+       int                     ptr;            /* key/record index */
+       xfs_fsblock_t           rbno;           /* right sibling block number */
+       xfs_buf_t                       *rbp;           /* right buffer pointer */
+       xfs_bmbt_block_t        *right;         /* right btree block */
+       xfs_bmbt_key_t          *rkp;           /* right btree key */
+       xfs_bmbt_rec_t          *rp;            /* pointer to bmap btree rec */
+       xfs_bmbt_ptr_t          *rpp;           /* right address pointer */
+       xfs_bmbt_block_t        *rrblock;       /* right-right btree block */
+       xfs_buf_t                       *rrbp;          /* right-right buffer pointer */
+       int                     rrecs;          /* right record count */
+       xfs_bmbt_rec_t          *rrp;           /* right record pointer */
+       xfs_btree_cur_t         *tcur;          /* temporary btree cursor */
+
+       XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
+       XFS_BMBT_TRACE_ARGI(cur, level);
+       ptr = cur->bc_ptrs[level];
+       tcur = (xfs_btree_cur_t *)0;
+       if (ptr == 0) {
+               XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+               *stat = 0;
+               return 0;
+       }
+       block = xfs_bmbt_get_block(cur, level, &bp);
+#ifdef DEBUG
+       if (error = xfs_btree_check_lblock(cur, block, level, bp)) {
+               XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+               goto error0;
+       }
+#endif
+       if (ptr > INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
+               XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+               *stat = 0;
+               return 0;
+       }
+       XFS_STATS_INC(xs_bmbt_delrec);
+       if (level > 0) {
+               kp = XFS_BMAP_KEY_IADDR(block, 1, cur);
+               pp = XFS_BMAP_PTR_IADDR(block, 1, cur);
+#ifdef DEBUG
+               for (i = ptr; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++) {
+                       if (error = xfs_btree_check_lptr(cur, INT_GET(pp[i], ARCH_CONVERT), level)) {
+                               XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+                               goto error0;
+                       }
+               }
+#endif
+               if (ptr < INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
+                       ovbcopy(&kp[ptr], &kp[ptr - 1],
+                               (INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr) * sizeof(*kp));
+                       ovbcopy(&pp[ptr], &pp[ptr - 1], /* INT_: direct copy */
+                               (INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr) * sizeof(*pp));
+                       xfs_bmbt_log_ptrs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT) - 1);
+                       xfs_bmbt_log_keys(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT) - 1);
+               }
+       } else {
+               rp = XFS_BMAP_REC_IADDR(block, 1, cur);
+               if (ptr < INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
+                       ovbcopy(&rp[ptr], &rp[ptr - 1],
+                               (INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr) * sizeof(*rp));
+                       xfs_bmbt_log_recs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT) - 1);
+               }
+               if (ptr == 1) {
+                       INT_SET(key.br_startoff, ARCH_CONVERT, xfs_bmbt_get_startoff(rp));
+                       kp = &key;
+               }
+       }
+       INT_MOD(block->bb_numrecs, ARCH_CONVERT, -1);
+       xfs_bmbt_log_block(cur, bp, XFS_BB_NUMRECS);
+       /*
+        * We're at the root level.
+        * First, shrink the root block in-memory.
+        * Try to get rid of the next level down.
+        * If we can't then there's nothing left to do.
+        */
+       if (level == cur->bc_nlevels - 1) {
+               xfs_iroot_realloc(cur->bc_private.b.ip, -1,
+                       cur->bc_private.b.whichfork);
+               if (error = xfs_bmbt_killroot(cur, async)) {
+                       XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+                       goto error0;
+               }
+               if (level > 0 && (error = xfs_bmbt_decrement(cur, level, &j))) {
+                       XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+                       goto error0;
+               }
+               XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+               *stat = 1;
+               return 0;
+       }
+       if (ptr == 1 && (error = xfs_bmbt_updkey(cur, kp, level + 1))) {
+               XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+               goto error0;
+       }
+       if (INT_GET(block->bb_numrecs, ARCH_CONVERT) >= XFS_BMAP_BLOCK_IMINRECS(level, cur)) {
+               if (level > 0 && (error = xfs_bmbt_decrement(cur, level, &j))) {
+                       XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+                       goto error0;
+               }
+               XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+               *stat = 1;
+               return 0;
+       }
+       rbno = INT_GET(block->bb_rightsib, ARCH_CONVERT);
+       lbno = INT_GET(block->bb_leftsib, ARCH_CONVERT);
+       /*
+        * One child of root, need to get a chance to copy its contents
+        * into the root and delete it. Can't go up to next level,
+        * there's nothing to delete there.
+        */
+       if (lbno == NULLFSBLOCK && rbno == NULLFSBLOCK &&
+           level == cur->bc_nlevels - 2) {
+               if (error = xfs_bmbt_killroot(cur, async)) {
+                       XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+                       goto error0;
+               }
+               if (level > 0 && (error = xfs_bmbt_decrement(cur, level, &i))) {
+                       XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+                       goto error0;
+               }
+               XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+               *stat = 1;
+               return 0;
+       }
+       ASSERT(rbno != NULLFSBLOCK || lbno != NULLFSBLOCK);
+       if (error = xfs_btree_dup_cursor(cur, &tcur)) {
+               XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+               goto error0;
+       }
+       bno = NULLFSBLOCK;
+       if (rbno != NULLFSBLOCK) {
+               i = xfs_btree_lastrec(tcur, level);
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               if (error = xfs_bmbt_increment(tcur, level, &i)) {
+                       XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+                       goto error0;
+               }
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               i = xfs_btree_lastrec(tcur, level);
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               rbp = tcur->bc_bufs[level];
+               right = XFS_BUF_TO_BMBT_BLOCK(rbp);
+#ifdef DEBUG
+               if (error = xfs_btree_check_lblock(cur, right, level, rbp)) {
+                       XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+                       goto error0;
+               }
+#endif
+               bno = INT_GET(right->bb_leftsib, ARCH_CONVERT);
+               if (INT_GET(right->bb_numrecs, ARCH_CONVERT) - 1 >=
+                   XFS_BMAP_BLOCK_IMINRECS(level, cur)) {
+                       if (error = xfs_bmbt_lshift(tcur, level, &i)) {
+                               XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+                               goto error0;
+                       }
+                       if (i) {
+                               ASSERT(INT_GET(block->bb_numrecs, ARCH_CONVERT) >=
+                                      XFS_BMAP_BLOCK_IMINRECS(level, tcur));
+                               xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
+                               tcur = NULL;
+                               if (level > 0) {
+                                       if (error = xfs_bmbt_decrement(cur,
+                                                       level, &i)) {
+                                               XFS_BMBT_TRACE_CURSOR(cur,
+                                                       ERROR);
+                                               goto error0;
+                                       }
+                               }
+                               XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+                               *stat = 1;
+                               return 0;
+                       }
+               }
+               rrecs = INT_GET(right->bb_numrecs, ARCH_CONVERT);
+               if (lbno != NULLFSBLOCK) {
+                       i = xfs_btree_firstrec(tcur, level);
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+                       if (error = xfs_bmbt_decrement(tcur, level, &i)) {
+                               XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+                               goto error0;
+                       }
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               }
+       }
+       if (lbno != NULLFSBLOCK) {
+               i = xfs_btree_firstrec(tcur, level);
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               /*
+                * decrement to last in block
+                */
+               if (error = xfs_bmbt_decrement(tcur, level, &i)) {
+                       XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+                       goto error0;
+               }
+               i = xfs_btree_firstrec(tcur, level);
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               lbp = tcur->bc_bufs[level];
+               left = XFS_BUF_TO_BMBT_BLOCK(lbp);
+#ifdef DEBUG
+               if (error = xfs_btree_check_lblock(cur, left, level, lbp)) {
+                       XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+                       goto error0;
+               }
+#endif
+               bno = INT_GET(left->bb_rightsib, ARCH_CONVERT);
+               if (INT_GET(left->bb_numrecs, ARCH_CONVERT) - 1 >=
+                   XFS_BMAP_BLOCK_IMINRECS(level, cur)) {
+                       if (error = xfs_bmbt_rshift(tcur, level, &i)) {
+                               XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+                               goto error0;
+                       }
+                       if (i) {
+                               ASSERT(INT_GET(block->bb_numrecs, ARCH_CONVERT) >=
+                                      XFS_BMAP_BLOCK_IMINRECS(level, tcur));
+                               xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
+                               tcur = NULL;
+                               if (level == 0)
+                                       cur->bc_ptrs[0]++;
+                               XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+                               *stat = 1;
+                               return 0;
+                       }
+               }
+               lrecs = INT_GET(left->bb_numrecs, ARCH_CONVERT);
+       }
+       xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
+       tcur = NULL;
+       mp = cur->bc_mp;
+       ASSERT(bno != NULLFSBLOCK);
+       if (lbno != NULLFSBLOCK &&
+           lrecs + INT_GET(block->bb_numrecs, ARCH_CONVERT) <= XFS_BMAP_BLOCK_IMAXRECS(level, cur)) {
+               rbno = bno;
+               right = block;
+               rbp = bp;
+               if (error = xfs_btree_read_bufl(mp, cur->bc_tp, lbno, 0, &lbp,
+                               XFS_BMAP_BTREE_REF)) {
+                       XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+                       goto error0;
+               }
+               left = XFS_BUF_TO_BMBT_BLOCK(lbp);
+               if (error = xfs_btree_check_lblock(cur, left, level, lbp)) {
+                       XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+                       goto error0;
+               }
+       } else if (rbno != NULLFSBLOCK &&
+                  rrecs + INT_GET(block->bb_numrecs, ARCH_CONVERT) <=
+                  XFS_BMAP_BLOCK_IMAXRECS(level, cur)) {
+               lbno = bno;
+               left = block;
+               lbp = bp;
+               if (error = xfs_btree_read_bufl(mp, cur->bc_tp, rbno, 0, &rbp,
+                               XFS_BMAP_BTREE_REF)) {
+                       XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+                       goto error0;
+               }
+               right = XFS_BUF_TO_BMBT_BLOCK(rbp);
+               if (error = xfs_btree_check_lblock(cur, right, level, rbp)) {
+                       XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+                       goto error0;
+               }
+               lrecs = INT_GET(left->bb_numrecs, ARCH_CONVERT);
+       } else {
+               if (level > 0 && (error = xfs_bmbt_decrement(cur, level, &i))) {
+                       XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+                       goto error0;
+               }
+               XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+               *stat = 1;
+               return 0;
+       }
+       if (level > 0) {
+               lkp = XFS_BMAP_KEY_IADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, cur);
+               lpp = XFS_BMAP_PTR_IADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, cur);
+               rkp = XFS_BMAP_KEY_IADDR(right, 1, cur);
+               rpp = XFS_BMAP_PTR_IADDR(right, 1, cur);
+#ifdef DEBUG
+               for (i = 0; i < INT_GET(right->bb_numrecs, ARCH_CONVERT); i++) {
+                       if (error = xfs_btree_check_lptr(cur, INT_GET(rpp[i], ARCH_CONVERT), level)) {
+                               XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+                               goto error0;
+                       }
+               }
+#endif
+               bcopy(rkp, lkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lkp));
+               bcopy(rpp, lpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lpp));
+               xfs_bmbt_log_keys(cur, lbp, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1,
+                       INT_GET(left->bb_numrecs, ARCH_CONVERT) + INT_GET(right->bb_numrecs, ARCH_CONVERT));
+               xfs_bmbt_log_ptrs(cur, lbp, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1,
+                       INT_GET(left->bb_numrecs, ARCH_CONVERT) + INT_GET(right->bb_numrecs, ARCH_CONVERT));
+       } else {
+               lrp = XFS_BMAP_REC_IADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, cur);
+               rrp = XFS_BMAP_REC_IADDR(right, 1, cur);
+               bcopy(rrp, lrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lrp));
+               xfs_bmbt_log_recs(cur, lbp, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1,
+                       INT_GET(left->bb_numrecs, ARCH_CONVERT) + INT_GET(right->bb_numrecs, ARCH_CONVERT));
+       }
+       INT_MOD(left->bb_numrecs, ARCH_CONVERT, INT_GET(right->bb_numrecs, ARCH_CONVERT));
+       left->bb_rightsib = right->bb_rightsib; /* INT_: direct copy */
+       xfs_bmbt_log_block(cur, lbp, XFS_BB_RIGHTSIB | XFS_BB_NUMRECS);
+       if (INT_GET(left->bb_rightsib, ARCH_CONVERT) != NULLDFSBNO) {
+               if (error = xfs_btree_read_bufl(mp, cur->bc_tp,
+                               INT_GET(left->bb_rightsib, ARCH_CONVERT), 0, &rrbp,
+                               XFS_BMAP_BTREE_REF)) {
+                       XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+                       goto error0;
+               }
+               rrblock = XFS_BUF_TO_BMBT_BLOCK(rrbp);
+               if (error = xfs_btree_check_lblock(cur, rrblock, level, rrbp)) {
+                       XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+                       goto error0;
+               }
+               INT_SET(rrblock->bb_leftsib, ARCH_CONVERT, lbno);
+               xfs_bmbt_log_block(cur, rrbp, XFS_BB_LEFTSIB);
+       }
+       xfs_bmap_add_free(XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(rbp)), 1,
+               cur->bc_private.b.flist, mp);
+       if (!async)
+               xfs_trans_set_sync(cur->bc_tp);
+       cur->bc_private.b.ip->i_d.di_nblocks--;
+       xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, XFS_ILOG_CORE);
+       if (XFS_IS_QUOTA_ON(mp) &&
+           cur->bc_private.b.ip->i_ino != mp->m_sb.sb_uquotino &&
+           cur->bc_private.b.ip->i_ino != mp->m_sb.sb_pquotino)
+               xfs_trans_mod_dquot_byino(cur->bc_tp, cur->bc_private.b.ip, 
+                       XFS_TRANS_DQ_BCOUNT, -1L);
+       xfs_trans_binval(cur->bc_tp, rbp);
+       if (bp != lbp) {
+               cur->bc_bufs[level] = lbp;
+               cur->bc_ptrs[level] += lrecs;
+               cur->bc_ra[level] = 0;
+       } else if (error = xfs_bmbt_increment(cur, level + 1, &i)) {
+               XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+               goto error0;
+       }
+       if (level > 0)
+               cur->bc_ptrs[level]--;
+       XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+       *stat = 2;
+       return 0;
+
+error0:
+       if (tcur)
+               xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
+       return error;
+}
+
+/*
+ * Insert one record/level.  Return information to the caller
+ * allowing the next level up to proceed if necessary.
+ */
+STATIC int                                     /* error */
+xfs_bmbt_insrec(
+       xfs_btree_cur_t         *cur,
+       int                     level,
+       xfs_fsblock_t           *bnop,
+       xfs_bmbt_rec_t          *recp,
+       xfs_btree_cur_t         **curp,
+       int                     *stat)          /* no-go/done/continue */
+{
+       xfs_bmbt_block_t        *block;         /* bmap btree block */
+       xfs_buf_t                       *bp;            /* buffer for block */
+       int                     error;          /* error return value */
+#ifdef XFS_BMBT_TRACE
+       static char             fname[] = "xfs_bmbt_insrec";
+#endif
+       int                     i;              /* loop index */
+       xfs_bmbt_key_t          key;            /* bmap btree key */
+       xfs_bmbt_key_t          *kp;            /* pointer to bmap btree key */
+       int                     logflags;       /* inode logging flags */
+       xfs_fsblock_t           nbno;           /* new block number */
+       struct xfs_btree_cur    *ncur;          /* new btree cursor */
+       xfs_bmbt_key_t          nkey;           /* new btree key value */
+       xfs_bmbt_rec_t          nrec;           /* new record count */
+       int                     optr;           /* old key/record index */
+       xfs_bmbt_ptr_t          *pp;            /* pointer to bmap block addr */
+       int                     ptr;            /* key/record index */
+       xfs_bmbt_rec_t          *rp;            /* pointer to bmap btree rec */
+
+       ASSERT(level < cur->bc_nlevels);
+       XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
+       XFS_BMBT_TRACE_ARGIFR(cur, level, *bnop, recp);
+       ncur = (xfs_btree_cur_t *)0;
+       INT_SET(key.br_startoff, ARCH_CONVERT, xfs_bmbt_get_startoff(recp));
+       optr = ptr = cur->bc_ptrs[level];
+       if (ptr == 0) {
+               XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+               *stat = 0;
+               return 0;
+       }
+       XFS_STATS_INC(xs_bmbt_insrec);
+       block = xfs_bmbt_get_block(cur, level, &bp);
+#ifdef DEBUG
+       if (error = xfs_btree_check_lblock(cur, block, level, bp)) {
+               XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+               return error;
+       }
+       if (ptr <= INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
+               if (level == 0) {
+                       rp = XFS_BMAP_REC_IADDR(block, ptr, cur);
+                       xfs_btree_check_rec(XFS_BTNUM_BMAP, recp, rp);
+               } else {
+                       kp = XFS_BMAP_KEY_IADDR(block, ptr, cur);
+                       xfs_btree_check_key(XFS_BTNUM_BMAP, &key, kp);
+               }
+       }
+#endif
+       nbno = NULLFSBLOCK;
+       if (INT_GET(block->bb_numrecs, ARCH_CONVERT) == XFS_BMAP_BLOCK_IMAXRECS(level, cur)) {
+               if (INT_GET(block->bb_numrecs, ARCH_CONVERT) < XFS_BMAP_BLOCK_DMAXRECS(level, cur)) {
+                       /*
+                        * A root block, that can be made bigger.
+                        */
+                       xfs_iroot_realloc(cur->bc_private.b.ip, 1,
+                               cur->bc_private.b.whichfork);
+                       block = xfs_bmbt_get_block(cur, level, &bp);
+               } else if (level == cur->bc_nlevels - 1) {
+                       if ((error = xfs_bmbt_newroot(cur, &logflags, stat)) ||
+                           *stat == 0) {
+                               XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+                               return error;
+                       }
+                       xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
+                               logflags);
+                       block = xfs_bmbt_get_block(cur, level, &bp);
+               } else {
+                       if (error = xfs_bmbt_rshift(cur, level, &i)) {
+                               XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+                               return error;
+                       }
+                       if (i) {
+                               /* nothing */
+                       } else {
+                               if (error = xfs_bmbt_lshift(cur, level, &i)) {
+                                       XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+                                       return error;
+                               }
+                               if (i) {
+                                       optr = ptr = cur->bc_ptrs[level];
+                               } else {
+                                       if (error = xfs_bmbt_split(cur, level,
+                                                       &nbno, &nkey, &ncur,
+                                                       &i)) {
+                                               XFS_BMBT_TRACE_CURSOR(cur,
+                                                       ERROR);
+                                               return error;
+                                       }
+                                       if (i) {
+                                               block = xfs_bmbt_get_block(
+                                                           cur, level, &bp);
+#ifdef DEBUG
+                                               if (error =
+                                                   xfs_btree_check_lblock(cur,
+                                                           block, level, bp)) {
+                                                       XFS_BMBT_TRACE_CURSOR(
+                                                               cur, ERROR);
+                                                       return error;
+                                               }
+#endif
+                                               ptr = cur->bc_ptrs[level];
+                                               xfs_bmbt_set_allf(&nrec,
+                                                       nkey.br_startoff, 0, 0,
+                                                       XFS_EXT_NORM);
+                                       } else {
+                                               XFS_BMBT_TRACE_CURSOR(cur,
+                                                       EXIT);
+                                               *stat = 0;
+                                               return 0;
+                                       }
+                               }
+                       }
+               }
+       }
+       if (level > 0) {
+               kp = XFS_BMAP_KEY_IADDR(block, 1, cur);
+               pp = XFS_BMAP_PTR_IADDR(block, 1, cur);
+#ifdef DEBUG
+               for (i = INT_GET(block->bb_numrecs, ARCH_CONVERT); i >= ptr; i--) {
+                       if (error = xfs_btree_check_lptr(cur, INT_GET(pp[i - 1], ARCH_CONVERT),
+                                       level)) {
+                               XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+                               return error;
+                       }
+               }
+#endif
+               ovbcopy(&kp[ptr - 1], &kp[ptr],
+                       (INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr + 1) * sizeof(*kp));
+               ovbcopy(&pp[ptr - 1], &pp[ptr], /* INT_: direct copy */
+                       (INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr + 1) * sizeof(*pp));
+#ifdef DEBUG
+               if (error = xfs_btree_check_lptr(cur, (xfs_bmbt_ptr_t)*bnop,
+                               level)) {
+                       XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+                       return error;
+               }
+#endif
+               kp[ptr - 1] = key;
+               INT_SET(pp[ptr - 1], ARCH_CONVERT, *bnop);
+               INT_MOD(block->bb_numrecs, ARCH_CONVERT, +1);
+               xfs_bmbt_log_keys(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT));
+               xfs_bmbt_log_ptrs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT));
+       } else {
+               rp = XFS_BMAP_REC_IADDR(block, 1, cur);
+               ovbcopy(&rp[ptr - 1], &rp[ptr],
+                       (INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr + 1) * sizeof(*rp));
+               rp[ptr - 1] = *recp;
+               INT_MOD(block->bb_numrecs, ARCH_CONVERT, +1);
+               xfs_bmbt_log_recs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT));
+       }
+       xfs_bmbt_log_block(cur, bp, XFS_BB_NUMRECS);
+#ifdef DEBUG
+       if (ptr < INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
+               if (level == 0)
+                       xfs_btree_check_rec(XFS_BTNUM_BMAP, rp + ptr - 1,
+                               rp + ptr);
+               else
+                       xfs_btree_check_key(XFS_BTNUM_BMAP, kp + ptr - 1,
+                               kp + ptr);
+       }
+#endif
+       if (optr == 1 && (error = xfs_bmbt_updkey(cur, &key, level + 1))) {
+               XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+               return error;
+       }
+       *bnop = nbno;
+       if (nbno != NULLFSBLOCK) {
+               *recp = nrec;
+               *curp = ncur;
+       }
+       XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+       *stat = 1;
+       return 0;
+}
+
+STATIC int
+xfs_bmbt_killroot(
+       xfs_btree_cur_t         *cur,
+       int                     async)
+{
+       xfs_bmbt_block_t        *block;
+       xfs_bmbt_block_t        *cblock;
+       xfs_buf_t                       *cbp;
+       xfs_bmbt_key_t          *ckp;
+       xfs_bmbt_ptr_t          *cpp;
+#ifdef DEBUG
+       int                     error;
+#endif
+#ifdef XFS_BMBT_TRACE
+       static char             fname[] = "xfs_bmbt_killroot";
+#endif
+       int                     i;
+       xfs_bmbt_key_t          *kp;
+       xfs_inode_t             *ip;
+       xfs_ifork_t             *ifp;
+       int                     level;
+       xfs_bmbt_ptr_t          *pp;
+
+       XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
+       level = cur->bc_nlevels - 1;
+       ASSERT(level >= 1);
+       /*
+        * Don't deal with the root block needs to be a leaf case.
+        * We're just going to turn the thing back into extents anyway.
+        */
+       if (level == 1) {
+               XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+               return 0;
+       }
+       block = xfs_bmbt_get_block(cur, level, &cbp);
+       /*
+        * Give up if the root has multiple children.
+        */
+       if (INT_GET(block->bb_numrecs, ARCH_CONVERT) != 1) {
+               XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+               return 0;
+       }
+       /*
+        * Only do this if the next level will fit.
+        * Then the data must be copied up to the inode,
+        * instead of freeing the root you free the next level.
+        */
+       cbp = cur->bc_bufs[level - 1];
+       cblock = XFS_BUF_TO_BMBT_BLOCK(cbp);
+       if (INT_GET(cblock->bb_numrecs, ARCH_CONVERT) > XFS_BMAP_BLOCK_DMAXRECS(level, cur)) {
+               XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+               return 0;
+       }
+       ASSERT(INT_GET(cblock->bb_leftsib, ARCH_CONVERT) == NULLDFSBNO);
+       ASSERT(INT_GET(cblock->bb_rightsib, ARCH_CONVERT) == NULLDFSBNO);
+       ip = cur->bc_private.b.ip;
+       ifp = XFS_IFORK_PTR(ip, cur->bc_private.b.whichfork);
+       ASSERT(XFS_BMAP_BLOCK_IMAXRECS(level, cur) ==
+              XFS_BMAP_BROOT_MAXRECS(ifp->if_broot_bytes));
+       i = (int)(INT_GET(cblock->bb_numrecs, ARCH_CONVERT) - XFS_BMAP_BLOCK_IMAXRECS(level, cur));
+       if (i) {
+               xfs_iroot_realloc(ip, i, cur->bc_private.b.whichfork);
+               block = ifp->if_broot;
+       }
+       INT_MOD(block->bb_numrecs, ARCH_CONVERT, i);
+       ASSERT(INT_GET(block->bb_numrecs, ARCH_CONVERT) == INT_GET(cblock->bb_numrecs, ARCH_CONVERT));
+       kp = XFS_BMAP_KEY_IADDR(block, 1, cur);
+       ckp = XFS_BMAP_KEY_IADDR(cblock, 1, cur);
+       bcopy(ckp, kp, INT_GET(block->bb_numrecs, ARCH_CONVERT) * sizeof(*kp));
+       pp = XFS_BMAP_PTR_IADDR(block, 1, cur);
+       cpp = XFS_BMAP_PTR_IADDR(cblock, 1, cur);
+#ifdef DEBUG
+       for (i = 0; i < INT_GET(cblock->bb_numrecs, ARCH_CONVERT); i++) {
+               if (error = xfs_btree_check_lptr(cur, INT_GET(cpp[i], ARCH_CONVERT), level - 1)) {
+                       XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+                       return error;
+               }
+       }
+#endif
+       bcopy(cpp, pp, INT_GET(block->bb_numrecs, ARCH_CONVERT) * sizeof(*pp));
+       xfs_bmap_add_free(XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(cbp)), 1,
+               cur->bc_private.b.flist, cur->bc_mp);
+       if (!async)
+               xfs_trans_set_sync(cur->bc_tp);
+       ip->i_d.di_nblocks--;
+       if (XFS_IS_QUOTA_ON(cur->bc_mp) &&
+           ip->i_ino != cur->bc_mp->m_sb.sb_uquotino &&
+           ip->i_ino != cur->bc_mp->m_sb.sb_pquotino)
+               xfs_trans_mod_dquot_byino(cur->bc_tp, ip, XFS_TRANS_DQ_BCOUNT,
+                       -1L);
+       xfs_trans_binval(cur->bc_tp, cbp);
+       cur->bc_bufs[level - 1] = NULL;
+       INT_MOD(block->bb_level, ARCH_CONVERT, -1);
+       xfs_trans_log_inode(cur->bc_tp, ip,
+               XFS_ILOG_CORE | XFS_ILOG_FBROOT(cur->bc_private.b.whichfork));
+       cur->bc_nlevels--;
+       XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+       return 0;
+}
+
+/*
+ * Log key values from the btree block.
+ */
+STATIC void
+xfs_bmbt_log_keys(
+       xfs_btree_cur_t *cur,
+       xfs_buf_t               *bp,
+       int             kfirst,
+       int             klast)
+{
+#ifdef XFS_BMBT_TRACE
+       static char     fname[] = "xfs_bmbt_log_keys";
+#endif
+       xfs_trans_t     *tp;
+
+       XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
+       XFS_BMBT_TRACE_ARGBII(cur, bp, kfirst, klast);
+       tp = cur->bc_tp;
+       if (bp) {
+               xfs_bmbt_block_t        *block;
+               int                     first;
+               xfs_bmbt_key_t          *kp;
+               int                     last;
+
+               block = XFS_BUF_TO_BMBT_BLOCK(bp);
+               kp = XFS_BMAP_KEY_DADDR(block, 1, cur);
+               first = (int)((xfs_caddr_t)&kp[kfirst - 1] - (xfs_caddr_t)block);
+               last = (int)(((xfs_caddr_t)&kp[klast] - 1) - (xfs_caddr_t)block);
+               xfs_trans_log_buf(tp, bp, first, last);
+       } else {
+               xfs_inode_t              *ip;
+
+               ip = cur->bc_private.b.ip;
+               xfs_trans_log_inode(tp, ip,
+                       XFS_ILOG_FBROOT(cur->bc_private.b.whichfork));
+       }
+       XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+}
+
+/*
+ * Log pointer values from the btree block.
+ */
+STATIC void
+xfs_bmbt_log_ptrs(
+       xfs_btree_cur_t *cur,
+       xfs_buf_t               *bp,
+       int             pfirst,
+       int             plast)
+{
+#ifdef XFS_BMBT_TRACE
+       static char     fname[] = "xfs_bmbt_log_ptrs";
+#endif
+       xfs_trans_t     *tp;
+
+       XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
+       XFS_BMBT_TRACE_ARGBII(cur, bp, pfirst, plast);
+       tp = cur->bc_tp;
+       if (bp) {
+               xfs_bmbt_block_t        *block;
+               int                     first;
+               int                     last;
+               xfs_bmbt_ptr_t          *pp;
+
+               block = XFS_BUF_TO_BMBT_BLOCK(bp);
+               pp = XFS_BMAP_PTR_DADDR(block, 1, cur);
+               first = (int)((xfs_caddr_t)&pp[pfirst - 1] - (xfs_caddr_t)block);
+               last = (int)(((xfs_caddr_t)&pp[plast] - 1) - (xfs_caddr_t)block);
+               xfs_trans_log_buf(tp, bp, first, last);
+       } else {
+               xfs_inode_t             *ip;
+
+               ip = cur->bc_private.b.ip;
+               xfs_trans_log_inode(tp, ip,
+                       XFS_ILOG_FBROOT(cur->bc_private.b.whichfork));
+       }
+       XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+}
+
+/*
+ * Lookup the record.  The cursor is made to point to it, based on dir.
+ */
+STATIC int                             /* error */
+xfs_bmbt_lookup(
+       xfs_btree_cur_t         *cur,
+       xfs_lookup_t            dir,
+       int                     *stat)          /* success/failure */
+{
+       xfs_bmbt_block_t        *block;
+       xfs_buf_t                       *bp;
+       xfs_daddr_t                     d;
+       xfs_sfiloff_t           diff;
+       int                     error;          /* error return value */
+#ifdef XFS_BMBT_TRACE
+       static char     fname[] = "xfs_bmbt_lookup";
+#endif
+       xfs_fsblock_t           fsbno;
+       int                     high;
+       int                     i;
+       int                     keyno;
+       xfs_bmbt_key_t          *kkbase;
+       xfs_bmbt_key_t          *kkp;
+       xfs_bmbt_rec_t          *krbase;
+       xfs_bmbt_rec_t          *krp;
+       int                     level;
+       int                     low;
+       xfs_mount_t             *mp;
+       xfs_bmbt_ptr_t          *pp;
+       xfs_bmbt_irec_t         *rp;
+       xfs_fileoff_t           startoff;
+       xfs_trans_t             *tp;
+
+       XFS_STATS_INC(xs_bmbt_lookup);
+       XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
+       XFS_BMBT_TRACE_ARGI(cur, (int)dir);
+       tp = cur->bc_tp;
+       mp = cur->bc_mp;
+       rp = &cur->bc_rec.b;
+       for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
+               if (level < cur->bc_nlevels - 1) {
+                       d = XFS_FSB_TO_DADDR(mp, fsbno);
+                       bp = cur->bc_bufs[level];
+                       if (bp && XFS_BUF_ADDR(bp) != d)
+                               bp = (xfs_buf_t *)0;
+                       if (!bp) {
+                               if (error = xfs_btree_read_bufl(mp, tp, fsbno,
+                                               0, &bp, XFS_BMAP_BTREE_REF)) {
+                                       XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+                                       return error;
+                               }
+                               xfs_btree_setbuf(cur, level, bp);
+                               block = XFS_BUF_TO_BMBT_BLOCK(bp);
+                               if (error = xfs_btree_check_lblock(cur, block,
+                                               level, bp)) {
+                                       XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+                                       return error;
+                               }
+                       } else
+                               block = XFS_BUF_TO_BMBT_BLOCK(bp);
+               } else
+                       block = xfs_bmbt_get_block(cur, level, &bp);
+               if (diff == 0)
+                       keyno = 1;
+               else {
+                       if (level > 0)
+                               kkbase = XFS_BMAP_KEY_IADDR(block, 1, cur);
+                       else
+                               krbase = XFS_BMAP_REC_IADDR(block, 1, cur);
+                       low = 1;
+                       if (!(high = INT_GET(block->bb_numrecs, ARCH_CONVERT))) {
+                               ASSERT(level == 0);
+                               cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
+                               XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+                               *stat = 0;
+                               return 0;
+                       }
+                       while (low <= high) {
+                               XFS_STATS_INC(xs_bmbt_compare);
+                               keyno = (low + high) >> 1;
+                               if (level > 0) {
+                                       kkp = kkbase + keyno - 1;
+                                       startoff = INT_GET(kkp->br_startoff, ARCH_CONVERT);
+                               } else {
+                                       krp = krbase + keyno - 1;
+                                       startoff = xfs_bmbt_get_startoff(krp);
+                               }
+                               diff = (xfs_sfiloff_t)
+                                               (startoff - rp->br_startoff);
+                               if (diff < 0)
+                                       low = keyno + 1;
+                               else if (diff > 0)
+                                       high = keyno - 1;
+                               else
+                                       break;
+                       }
+               }
+               if (level > 0) {
+                       if (diff > 0 && --keyno < 1)
+                               keyno = 1;
+                       pp = XFS_BMAP_PTR_IADDR(block, keyno, cur);
+#ifdef DEBUG
+                       if (error = xfs_btree_check_lptr(cur, INT_GET(*pp, ARCH_CONVERT), level)) {
+                               XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+                               return error;
+                       }
+#endif
+                       fsbno = INT_GET(*pp, ARCH_CONVERT);
+                       cur->bc_ptrs[level] = keyno;
+               }
+       }
+       if (dir != XFS_LOOKUP_LE && diff < 0) {
+               keyno++;
+               /*
+                * If ge search and we went off the end of the block, but it's
+                * not the last block, we're in the wrong block.
+                */
+               if (dir == XFS_LOOKUP_GE && keyno > INT_GET(block->bb_numrecs, ARCH_CONVERT) &&
+                   INT_GET(block->bb_rightsib, ARCH_CONVERT) != NULLDFSBNO) {
+                       cur->bc_ptrs[0] = keyno;
+                       if (error = xfs_bmbt_increment(cur, 0, &i)) {
+                               XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+                               return error;
+                       }
+                       XFS_WANT_CORRUPTED_RETURN(i == 1);
+                       XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+                       *stat = 1;
+                       return 0;
+               }
+       }
+       else if (dir == XFS_LOOKUP_LE && diff > 0)
+               keyno--;
+       cur->bc_ptrs[0] = keyno;
+       if (keyno == 0 || keyno > INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
+               XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+               *stat = 0;
+       } else {
+               XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+               *stat = ((dir != XFS_LOOKUP_EQ) || (diff == 0));
+       }
+       return 0;
+}
+
+/*
+ * Move 1 record left from cur/level if possible.
+ * Update cur to reflect the new path.
+ */
+STATIC int                                     /* error */
+xfs_bmbt_lshift(
+       xfs_btree_cur_t         *cur,
+       int                     level,
+       int                     *stat)          /* success/failure */
+{
+       int                     error;          /* error return value */
+#ifdef XFS_BMBT_TRACE
+       static char             fname[] = "xfs_bmbt_lshift";
+#endif
+#ifdef DEBUG
+       int                     i;              /* loop counter */
+#endif
+       xfs_bmbt_key_t          key;            /* bmap btree key */
+       xfs_buf_t                       *lbp;           /* left buffer pointer */
+       xfs_bmbt_block_t        *left;          /* left btree block */
+       xfs_bmbt_key_t          *lkp;           /* left btree key */
+       xfs_bmbt_ptr_t          *lpp;           /* left address pointer */
+       int                     lrecs;          /* left record count */
+       xfs_bmbt_rec_t          *lrp;           /* left record pointer */
+       xfs_mount_t             *mp;            /* file system mount point */
+       xfs_buf_t                       *rbp;           /* right buffer pointer */
+       xfs_bmbt_block_t        *right;         /* right btree block */
+       xfs_bmbt_key_t          *rkp;           /* right btree key */
+       xfs_bmbt_ptr_t          *rpp;           /* right address pointer */
+       xfs_bmbt_rec_t          *rrp;           /* right record pointer */
+
+       XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
+       XFS_BMBT_TRACE_ARGI(cur, level);
+       if (level == cur->bc_nlevels - 1) {
+               XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+               *stat = 0;
+               return 0;
+       }
+       rbp = cur->bc_bufs[level];
+       right = XFS_BUF_TO_BMBT_BLOCK(rbp);
+#ifdef DEBUG
+       if (error = xfs_btree_check_lblock(cur, right, level, rbp)) {
+               XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+               return error;
+       }
+#endif
+       if (INT_GET(right->bb_leftsib, ARCH_CONVERT) == NULLDFSBNO) {
+               XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+               *stat = 0;
+               return 0;
+       }
+       if (cur->bc_ptrs[level] <= 1) {
+               XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+               *stat = 0;
+               return 0;
+       }
+       mp = cur->bc_mp;
+       if (error = xfs_btree_read_bufl(mp, cur->bc_tp, INT_GET(right->bb_leftsib, ARCH_CONVERT), 0,
+                       &lbp, XFS_BMAP_BTREE_REF)) {
+               XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+               return error;
+       }
+       left = XFS_BUF_TO_BMBT_BLOCK(lbp);
+       if (error = xfs_btree_check_lblock(cur, left, level, lbp)) {
+               XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+               return error;
+       }
+       if (INT_GET(left->bb_numrecs, ARCH_CONVERT) == XFS_BMAP_BLOCK_IMAXRECS(level, cur)) {
+               XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+               *stat = 0;
+               return 0;
+       }
+       lrecs = INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1;
+       if (level > 0) {
+               lkp = XFS_BMAP_KEY_IADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, cur);
+               rkp = XFS_BMAP_KEY_IADDR(right, 1, cur);
+               *lkp = *rkp;
+               xfs_bmbt_log_keys(cur, lbp, lrecs, lrecs);
+               lpp = XFS_BMAP_PTR_IADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, cur);
+               rpp = XFS_BMAP_PTR_IADDR(right, 1, cur);
+#ifdef DEBUG
+               if (error = xfs_btree_check_lptr(cur, INT_GET(*rpp, ARCH_CONVERT), level)) {
+                       XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+                       return error;
+               }
+#endif
+               *lpp = *rpp; /* INT_: direct copy */
+               xfs_bmbt_log_ptrs(cur, lbp, lrecs, lrecs);
+       } else {
+               lrp = XFS_BMAP_REC_IADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, cur);
+               rrp = XFS_BMAP_REC_IADDR(right, 1, cur);
+               *lrp = *rrp;
+               xfs_bmbt_log_recs(cur, lbp, lrecs, lrecs);
+       }
+       INT_MOD(left->bb_numrecs, ARCH_CONVERT, +1);
+       xfs_bmbt_log_block(cur, lbp, XFS_BB_NUMRECS);
+#ifdef DEBUG
+       if (level > 0)
+               xfs_btree_check_key(XFS_BTNUM_BMAP, lkp - 1, lkp);
+       else
+               xfs_btree_check_rec(XFS_BTNUM_BMAP, lrp - 1, lrp);
+#endif
+       INT_MOD(right->bb_numrecs, ARCH_CONVERT, -1);
+       xfs_bmbt_log_block(cur, rbp, XFS_BB_NUMRECS);
+       if (level > 0) {
+#ifdef DEBUG
+               for (i = 0; i < INT_GET(right->bb_numrecs, ARCH_CONVERT); i++) {
+                       if (error = xfs_btree_check_lptr(cur, INT_GET(rpp[i + 1], ARCH_CONVERT),
+                                       level)) {
+                               XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+                               return error;
+                       }
+               }
+#endif
+               ovbcopy(rkp + 1, rkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
+               ovbcopy(rpp + 1, rpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
+               xfs_bmbt_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
+               xfs_bmbt_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
+       } else {
+               ovbcopy(rrp + 1, rrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
+               xfs_bmbt_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
+               INT_SET(key.br_startoff, ARCH_CONVERT, xfs_bmbt_get_startoff(rrp));
+               rkp = &key;
+       }
+       if (error = xfs_bmbt_updkey(cur, rkp, level + 1)) {
+               XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+               return error;
+       }
+       cur->bc_ptrs[level]--;
+       XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+       *stat = 1;
+       return 0;
+}
+
+/*
+ * Move 1 record right from cur/level if possible.
+ * Update cur to reflect the new path.
+ */
+STATIC int                                     /* error */
+xfs_bmbt_rshift(
+       xfs_btree_cur_t         *cur,
+       int                     level,
+       int                     *stat)          /* success/failure */
+{
+       int                     error;          /* error return value */
+#ifdef XFS_BMBT_TRACE
+       static char             fname[] = "xfs_bmbt_rshift";
+#endif
+       int                     i;              /* loop counter */
+       xfs_bmbt_key_t          key;            /* bmap btree key */
+       xfs_buf_t                       *lbp;           /* left buffer pointer */
+       xfs_bmbt_block_t        *left;          /* left btree block */
+       xfs_bmbt_key_t          *lkp;           /* left btree key */
+       xfs_bmbt_ptr_t          *lpp;           /* left address pointer */
+       xfs_bmbt_rec_t          *lrp;           /* left record pointer */
+       xfs_mount_t             *mp;            /* file system mount point */
+       xfs_buf_t                       *rbp;           /* right buffer pointer */
+       xfs_bmbt_block_t        *right;         /* right btree block */
+       xfs_bmbt_key_t          *rkp;           /* right btree key */
+       xfs_bmbt_ptr_t          *rpp;           /* right address pointer */
+       xfs_bmbt_rec_t          *rrp;           /* right record pointer */
+       struct xfs_btree_cur    *tcur;          /* temporary btree cursor */
+
+       XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
+       XFS_BMBT_TRACE_ARGI(cur, level);
+       if (level == cur->bc_nlevels - 1) {
+               XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+               *stat = 0;
+               return 0;
+       }
+       lbp = cur->bc_bufs[level];
+       left = XFS_BUF_TO_BMBT_BLOCK(lbp);
+#ifdef DEBUG
+       if (error = xfs_btree_check_lblock(cur, left, level, lbp)) {
+               XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+               return error;
+       }
+#endif
+       if (INT_GET(left->bb_rightsib, ARCH_CONVERT) == NULLDFSBNO) {
+               XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+               *stat = 0;
+               return 0;
+       }
+       if (cur->bc_ptrs[level] >= INT_GET(left->bb_numrecs, ARCH_CONVERT)) {
+               XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+               *stat = 0;
+               return 0;
+       }
+       mp = cur->bc_mp;
+       if (error = xfs_btree_read_bufl(mp, cur->bc_tp, INT_GET(left->bb_rightsib, ARCH_CONVERT), 0,
+                       &rbp, XFS_BMAP_BTREE_REF)) {
+               XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+               return error;
+       }
+       right = XFS_BUF_TO_BMBT_BLOCK(rbp);
+       if (error = xfs_btree_check_lblock(cur, right, level, rbp)) {
+               XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+               return error;
+       }
+       if (INT_GET(right->bb_numrecs, ARCH_CONVERT) == XFS_BMAP_BLOCK_IMAXRECS(level, cur)) {
+               XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+               *stat = 0;
+               return 0;
+       }
+       if (level > 0) {
+               lkp = XFS_BMAP_KEY_IADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT), cur);
+               lpp = XFS_BMAP_PTR_IADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT), cur);
+               rkp = XFS_BMAP_KEY_IADDR(right, 1, cur);
+               rpp = XFS_BMAP_PTR_IADDR(right, 1, cur);
+#ifdef DEBUG
+               for (i = INT_GET(right->bb_numrecs, ARCH_CONVERT) - 1; i >= 0; i--) {
+                       if (error = xfs_btree_check_lptr(cur, INT_GET(rpp[i], ARCH_CONVERT), level)) {
+                               XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+                               return error;
+                       }
+               }
+#endif
+               ovbcopy(rkp, rkp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
+               ovbcopy(rpp, rpp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
+#ifdef DEBUG
+               if (error = xfs_btree_check_lptr(cur, INT_GET(*lpp, ARCH_CONVERT), level)) {
+                       XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+                       return error;
+               }
+#endif
+               *rkp = *lkp;
+               *rpp = *lpp; /* INT_: direct copy */
+               xfs_bmbt_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1);
+               xfs_bmbt_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1);
+       } else {
+               lrp = XFS_BMAP_REC_IADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT), cur);
+               rrp = XFS_BMAP_REC_IADDR(right, 1, cur);
+               ovbcopy(rrp, rrp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
+               *rrp = *lrp;
+               xfs_bmbt_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1);
+               INT_SET(key.br_startoff, ARCH_CONVERT, xfs_bmbt_get_startoff(rrp));
+               rkp = &key;
+       }
+       INT_MOD(left->bb_numrecs, ARCH_CONVERT, -1);
+       xfs_bmbt_log_block(cur, lbp, XFS_BB_NUMRECS);
+       INT_MOD(right->bb_numrecs, ARCH_CONVERT, +1);
+#ifdef DEBUG
+       if (level > 0)
+               xfs_btree_check_key(XFS_BTNUM_BMAP, rkp, rkp + 1);
+       else
+               xfs_btree_check_rec(XFS_BTNUM_BMAP, rrp, rrp + 1);
+#endif
+       xfs_bmbt_log_block(cur, rbp, XFS_BB_NUMRECS);
+       if (error = xfs_btree_dup_cursor(cur, &tcur)) {
+               XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+               return error;
+       }
+       i = xfs_btree_lastrec(tcur, level);
+       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+       if (error = xfs_bmbt_increment(tcur, level, &i)) {
+               XFS_BMBT_TRACE_CURSOR(tcur, ERROR);
+               goto error1;
+       }
+       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+       if (error = xfs_bmbt_updkey(tcur, rkp, level + 1)) {
+               XFS_BMBT_TRACE_CURSOR(tcur, ERROR);
+               goto error1;
+       }
+       xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
+       XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+       *stat = 1;
+       return 0;
+error0:
+       XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+error1:
+       xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
+       return error;
+}
+
+/*
+ * Determine the extent state.
+ */
+/* ARGSUSED */
+STATIC xfs_exntst_t
+xfs_extent_state(
+       xfs_filblks_t           blks,
+       int                     extent_flag)
+{
+       if (extent_flag) {
+               ASSERT(blks != 0);      /* saved for DMIG */
+               return XFS_EXT_UNWRITTEN;
+       }
+       return XFS_EXT_NORM;
+}
+
+
+/*
+ * Split cur/level block in half.
+ * Return new block number and its first record (to be inserted into parent).
+ */
+STATIC int                                     /* error */
+xfs_bmbt_split(
+       xfs_btree_cur_t         *cur,
+       int                     level,
+       xfs_fsblock_t           *bnop,
+       xfs_bmbt_key_t          *keyp,
+       xfs_btree_cur_t         **curp,
+       int                     *stat)          /* success/failure */
+{
+       xfs_alloc_arg_t         args;           /* block allocation args */
+       int                     error;          /* error return value */
+#ifdef XFS_BMBT_TRACE
+       static char             fname[] = "xfs_bmbt_split";
+#endif
+       int                     i;              /* loop counter */
+       xfs_fsblock_t           lbno;           /* left sibling block number */
+       xfs_buf_t                       *lbp;           /* left buffer pointer */
+       xfs_bmbt_block_t        *left;          /* left btree block */
+       xfs_bmbt_key_t          *lkp;           /* left btree key */
+       xfs_bmbt_ptr_t          *lpp;           /* left address pointer */
+       xfs_bmbt_rec_t          *lrp;           /* left record pointer */
+       xfs_buf_t                       *rbp;           /* right buffer pointer */
+       xfs_bmbt_block_t        *right;         /* right btree block */
+       xfs_bmbt_key_t          *rkp;           /* right btree key */
+       xfs_bmbt_ptr_t          *rpp;           /* right address pointer */
+       xfs_bmbt_block_t        *rrblock;       /* right-right btree block */
+       xfs_buf_t                       *rrbp;          /* right-right buffer pointer */
+       xfs_bmbt_rec_t          *rrp;           /* right record pointer */
+
+       XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
+       XFS_BMBT_TRACE_ARGIFK(cur, level, *bnop, keyp);
+       args.tp = cur->bc_tp;
+       args.mp = cur->bc_mp;
+       lbp = cur->bc_bufs[level];
+       lbno = XFS_DADDR_TO_FSB(args.mp, XFS_BUF_ADDR(lbp));
+       left = XFS_BUF_TO_BMBT_BLOCK(lbp);
+       args.fsbno = cur->bc_private.b.firstblock;
+       if (args.fsbno == NULLFSBLOCK) {
+               args.fsbno = lbno;
+               args.type = XFS_ALLOCTYPE_START_BNO;
+       } else if (cur->bc_private.b.flist->xbf_low)
+               args.type = XFS_ALLOCTYPE_FIRST_AG;
+       else
+               args.type = XFS_ALLOCTYPE_NEAR_BNO;
+       args.mod = args.minleft = args.alignment = args.total = args.isfl =
+               args.userdata = args.minalignslop = 0;
+       args.minlen = args.maxlen = args.prod = 1;
+       args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL;
+       if (!args.wasdel && xfs_trans_get_block_res(args.tp) == 0) {
+               XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+               return XFS_ERROR(ENOSPC);
+       }
+       if (error = xfs_alloc_vextent(&args)) {
+               XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+               return error;
+       }
+       if (args.fsbno == NULLFSBLOCK) {
+               XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+               *stat = 0;
+               return 0;
+       }
+       ASSERT(args.len == 1);
+       cur->bc_private.b.firstblock = args.fsbno;
+       cur->bc_private.b.allocated++;
+       cur->bc_private.b.ip->i_d.di_nblocks++;
+       xfs_trans_log_inode(args.tp, cur->bc_private.b.ip, XFS_ILOG_CORE);
+       if (XFS_IS_QUOTA_ON(args.mp) &&
+           cur->bc_private.b.ip->i_ino != args.mp->m_sb.sb_uquotino &&
+           cur->bc_private.b.ip->i_ino != args.mp->m_sb.sb_pquotino)
+               xfs_trans_mod_dquot_byino(args.tp, cur->bc_private.b.ip,
+                       XFS_TRANS_DQ_BCOUNT, 1L);
+       rbp = xfs_btree_get_bufl(args.mp, args.tp, args.fsbno, 0);
+       right = XFS_BUF_TO_BMBT_BLOCK(rbp);
+#ifdef DEBUG
+       if (error = xfs_btree_check_lblock(cur, left, level, rbp)) {
+               XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+               return error;
+       }
+#endif
+       INT_SET(right->bb_magic, ARCH_CONVERT, XFS_BMAP_MAGIC);
+       right->bb_level = left->bb_level; /* INT_: direct copy */
+       INT_SET(right->bb_numrecs, ARCH_CONVERT, (__uint16_t)(INT_GET(left->bb_numrecs, ARCH_CONVERT) / 2));
+       if ((INT_GET(left->bb_numrecs, ARCH_CONVERT) & 1) &&
+           cur->bc_ptrs[level] <= INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1)
+               INT_MOD(right->bb_numrecs, ARCH_CONVERT, +1);
+       i = INT_GET(left->bb_numrecs, ARCH_CONVERT) - INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1;
+       if (level > 0) {
+               lkp = XFS_BMAP_KEY_IADDR(left, i, cur);
+               lpp = XFS_BMAP_PTR_IADDR(left, i, cur);
+               rkp = XFS_BMAP_KEY_IADDR(right, 1, cur);
+               rpp = XFS_BMAP_PTR_IADDR(right, 1, cur);
+#ifdef DEBUG
+               for (i = 0; i < INT_GET(right->bb_numrecs, ARCH_CONVERT); i++) {
+                       if (error = xfs_btree_check_lptr(cur, INT_GET(lpp[i], ARCH_CONVERT), level)) {
+                               XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+                               return error;
+                       }
+               }
+#endif
+               bcopy(lkp, rkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
+               bcopy(lpp, rpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
+               xfs_bmbt_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
+               xfs_bmbt_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
+               keyp->br_startoff = INT_GET(rkp->br_startoff, ARCH_CONVERT);
+       } else {
+               lrp = XFS_BMAP_REC_IADDR(left, i, cur);
+               rrp = XFS_BMAP_REC_IADDR(right, 1, cur);
+               bcopy(lrp, rrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
+               xfs_bmbt_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
+               keyp->br_startoff = xfs_bmbt_get_startoff(rrp);
+       }
+       INT_MOD(left->bb_numrecs, ARCH_CONVERT, -(INT_GET(right->bb_numrecs, ARCH_CONVERT)));
+       right->bb_rightsib = left->bb_rightsib; /* INT_: direct copy */
+       INT_SET(left->bb_rightsib, ARCH_CONVERT, args.fsbno);
+       INT_SET(right->bb_leftsib, ARCH_CONVERT, lbno);
+       xfs_bmbt_log_block(cur, rbp, XFS_BB_ALL_BITS);
+       xfs_bmbt_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
+       if (INT_GET(right->bb_rightsib, ARCH_CONVERT) != NULLDFSBNO) {
+               if (error = xfs_btree_read_bufl(args.mp, args.tp,
+                               INT_GET(right->bb_rightsib, ARCH_CONVERT), 0, &rrbp,
+                               XFS_BMAP_BTREE_REF)) {
+                       XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+                       return error;
+               }
+               rrblock = XFS_BUF_TO_BMBT_BLOCK(rrbp);
+               if (error = xfs_btree_check_lblock(cur, rrblock, level, rrbp)) {
+                       XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+                       return error;
+               }
+               INT_SET(rrblock->bb_leftsib, ARCH_CONVERT, args.fsbno);
+               xfs_bmbt_log_block(cur, rrbp, XFS_BB_LEFTSIB);
+       }
+       if (cur->bc_ptrs[level] > INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1) {
+               xfs_btree_setbuf(cur, level, rbp);
+               cur->bc_ptrs[level] -= INT_GET(left->bb_numrecs, ARCH_CONVERT);
+       }
+       if (level + 1 < cur->bc_nlevels) {
+               if (error = xfs_btree_dup_cursor(cur, curp)) {
+                       XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+                       return error;
+               }
+               (*curp)->bc_ptrs[level + 1]++;
+       }
+       *bnop = args.fsbno;
+       XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+       *stat = 1;
+       return 0;
+}
+
+/*
+ * Update keys for the record.
+ */
+STATIC int
+xfs_bmbt_updkey(
+       xfs_btree_cur_t         *cur,
+       xfs_bmbt_key_t          *keyp,  /* on-disk format */
+       int                     level)
+{
+       xfs_bmbt_block_t        *block;
+       xfs_buf_t                       *bp;
+#ifdef DEBUG
+       int                     error;
+#endif
+#ifdef XFS_BMBT_TRACE
+       static char             fname[] = "xfs_bmbt_updkey";
+#endif
+       xfs_bmbt_key_t          *kp;
+       int                     ptr;
+
+       ASSERT(level >= 1);
+       XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
+       XFS_BMBT_TRACE_ARGIK(cur, level, keyp);
+       for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
+               block = xfs_bmbt_get_block(cur, level, &bp);
+#ifdef DEBUG
+               if (error = xfs_btree_check_lblock(cur, block, level, bp)) {
+                       XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+                       return error;
+               }
+#endif
+               ptr = cur->bc_ptrs[level];
+               kp = XFS_BMAP_KEY_IADDR(block, ptr, cur);
+               *kp = *keyp;
+               xfs_bmbt_log_keys(cur, bp, ptr, ptr);
+       }
+       XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+       return 0;
+}
+
+/*
+ * Convert on-disk form of btree root to in-memory form.
+ */
+void
+xfs_bmdr_to_bmbt(
+       xfs_bmdr_block_t        *dblock,
+       int                     dblocklen,
+       xfs_bmbt_block_t        *rblock,
+       int                     rblocklen)
+{
+       int                     dmxr;
+       xfs_bmbt_key_t          *fkp;
+       xfs_bmbt_ptr_t          *fpp;
+       xfs_bmbt_key_t          *tkp;
+       xfs_bmbt_ptr_t          *tpp;
+
+       INT_SET(rblock->bb_magic, ARCH_CONVERT, XFS_BMAP_MAGIC);
+       rblock->bb_level = dblock->bb_level;    /* both in on-disk format */
+       ASSERT(INT_GET(rblock->bb_level, ARCH_CONVERT) > 0);
+       rblock->bb_numrecs = dblock->bb_numrecs;/* both in on-disk format */
+       INT_SET(rblock->bb_leftsib, ARCH_CONVERT, NULLDFSBNO);
+       INT_SET(rblock->bb_rightsib, ARCH_CONVERT, NULLDFSBNO);
+       dmxr = (int)XFS_BTREE_BLOCK_MAXRECS(dblocklen, xfs_bmdr, 0);
+       fkp = XFS_BTREE_KEY_ADDR(dblocklen, xfs_bmdr, dblock, 1, dmxr);
+       tkp = XFS_BMAP_BROOT_KEY_ADDR(rblock, 1, rblocklen);
+       fpp = XFS_BTREE_PTR_ADDR(dblocklen, xfs_bmdr, dblock, 1, dmxr);
+       tpp = XFS_BMAP_BROOT_PTR_ADDR(rblock, 1, rblocklen);
+       dmxr = INT_GET(dblock->bb_numrecs, ARCH_CONVERT);
+       bcopy(fkp, tkp, sizeof(*fkp) * dmxr);
+       bcopy(fpp, tpp, sizeof(*fpp) * dmxr); /* INT_: direct copy */
+}
+
+/*
+ * Decrement cursor by one record at the level.
+ * For nonzero levels the leaf-ward information is untouched.
+ */
+int                                            /* error */
+xfs_bmbt_decrement(
+       xfs_btree_cur_t         *cur,
+       int                     level,
+       int                     *stat)          /* success/failure */
+{
+       xfs_bmbt_block_t        *block;
+       xfs_buf_t                       *bp;
+       int                     error;          /* error return value */
+#ifdef XFS_BMBT_TRACE
+       static char             fname[] = "xfs_bmbt_decrement";
+#endif
+       xfs_fsblock_t           fsbno;
+       int                     lev;
+       xfs_mount_t             *mp;
+       xfs_trans_t             *tp;
+
+       XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
+       XFS_BMBT_TRACE_ARGI(cur, level);
+       ASSERT(level < cur->bc_nlevels);
+       if (level < cur->bc_nlevels - 1)
+               xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
+       if (--cur->bc_ptrs[level] > 0) {
+               XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+               *stat = 1;
+               return 0;
+       }
+       block = xfs_bmbt_get_block(cur, level, &bp);
+#ifdef DEBUG
+       if (error = xfs_btree_check_lblock(cur, block, level, bp)) {
+               XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+               return error;
+       }
+#endif
+       if (INT_GET(block->bb_leftsib, ARCH_CONVERT) == NULLDFSBNO) {
+               XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+               *stat = 0;
+               return 0;
+       }
+       for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
+               if (--cur->bc_ptrs[lev] > 0)
+                       break;
+               if (lev < cur->bc_nlevels - 1)
+                       xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
+       }
+       if (lev == cur->bc_nlevels) {
+               XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+               *stat = 0;
+               return 0;
+       }
+       tp = cur->bc_tp;
+       mp = cur->bc_mp;
+       for (block = xfs_bmbt_get_block(cur, lev, &bp); lev > level; ) {
+               fsbno = INT_GET(*XFS_BMAP_PTR_IADDR(block, cur->bc_ptrs[lev], cur), ARCH_CONVERT);
+               if (error = xfs_btree_read_bufl(mp, tp, fsbno, 0, &bp,
+                               XFS_BMAP_BTREE_REF)) {
+                       XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+                       return error;
+               }
+               lev--;
+               xfs_btree_setbuf(cur, lev, bp);
+               block = XFS_BUF_TO_BMBT_BLOCK(bp);
+               if (error = xfs_btree_check_lblock(cur, block, lev, bp)) {
+                       XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+                       return error;
+               }
+               cur->bc_ptrs[lev] = INT_GET(block->bb_numrecs, ARCH_CONVERT);
+       }
+       XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+       *stat = 1;
+       return 0;
+}
+
+/*
+ * Delete the record pointed to by cur.
+ */
+int                                    /* error */
+xfs_bmbt_delete(
+       xfs_btree_cur_t *cur,
+       int             async,          /* deletion can be async */
+       int             *stat)          /* success/failure */
+{
+       int             error;          /* error return value */
+#ifdef XFS_BMBT_TRACE
+       static char     fname[] = "xfs_bmbt_delete";
+#endif
+       int             i;
+       int             level;
+
+       XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
+       for (level = 0, i = 2; i == 2; level++) {
+               if (error = xfs_bmbt_delrec(cur, level, async, &i)) {
+                       XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+                       return error;
+               }
+       }
+       if (i == 0) {
+               for (level = 1; level < cur->bc_nlevels; level++) {
+                       if (cur->bc_ptrs[level] == 0) {
+                               if (error = xfs_bmbt_decrement(cur, level,
+                                               &i)) {
+                                       XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+                                       return error;
+                               }
+                               break;
+                       }
+               }
+       }
+       XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+       *stat = i;
+       return 0;
+}
+
+/*
+ * Convert a compressed bmap extent record to an uncompressed form.
+ * This code must be in sync with the routines xfs_bmbt_get_startoff,
+ * xfs_bmbt_get_startblock, xfs_bmbt_get_blockcount and xfs_bmbt_get_state.
+ */
+void
+xfs_bmbt_get_all(
+       xfs_bmbt_rec_t  *r,
+       xfs_bmbt_irec_t *s)
+{
+       int     ext_flag;
+       xfs_exntst_t st;
+
+#if BMBT_USE_64
+       ext_flag = (int)((INT_GET(r->l0, ARCH_CONVERT)) >> (64 - BMBT_EXNTFLAG_BITLEN));
+#if XFS_BIG_FILES
+       s->br_startoff = ((xfs_fileoff_t)INT_GET(r->l0, ARCH_CONVERT) &
+                          XFS_MASK64LO(64 - BMBT_EXNTFLAG_BITLEN)) >> 9;
+#else  /* !XFS_BIG_FILES */
+       {
+               xfs_dfiloff_t   o;
+
+               o = ((xfs_dfiloff_t)INT_GET(r->l0, ARCH_CONVERT) &
+                     XFS_MASK64LO(64 - BMBT_EXNTFLAG_BITLEN)) >> 9;
+               ASSERT((o >> 32) == 0);
+               s->br_startoff = (xfs_fileoff_t)o;
+       }
+#endif /* XFS_BIG_FILES */
+#if XFS_BIG_FILESYSTEMS
+       s->br_startblock = (((xfs_fsblock_t)INT_GET(r->l0, ARCH_CONVERT) & XFS_MASK64LO(9)) << 43) | 
+                          (((xfs_fsblock_t)INT_GET(r->l1, ARCH_CONVERT)) >> 21);
+#else
+#ifdef DEBUG
+       {
+               xfs_dfsbno_t    b;
+
+               b = (((xfs_dfsbno_t)INT_GET(r->l0, ARCH_CONVERT) & XFS_MASK64LO(9)) << 43) | 
+                   (((xfs_dfsbno_t)INT_GET(r->l1, ARCH_CONVERT)) >> 21);
+               ASSERT((b >> 32) == 0 || ISNULLDSTARTBLOCK(b));
+               s->br_startblock = (xfs_fsblock_t)b;
+       }
+#else  /* !DEBUG */
+       s->br_startblock = (xfs_fsblock_t)(((xfs_dfsbno_t)INT_GET(r->l1, ARCH_CONVERT)) >> 21);
+#endif /* DEBUG */
+#endif /* XFS_BIG_FILESYSTEMS */
+       s->br_blockcount = (xfs_filblks_t)(INT_GET(r->l1, ARCH_CONVERT) & XFS_MASK64LO(21));
+#else  /* !BMBT_USE_64 */
+       ext_flag = (INT_GET(r->l0, ARCH_CONVERT) >> (32 - BMBT_EXNTFLAG_BITLEN));
+#if XFS_BIG_FILES
+       s->br_startoff = (((xfs_fileoff_t)INT_GET(r->l0, ARCH_CONVERT) &
+                           XFS_MASK32LO(32 - BMBT_EXNTFLAG_BITLEN)) << 23) |
+                        (((xfs_fileoff_t)INT_GET(r->l1, ARCH_CONVERT)) >> 9);
+#else  /* !XFS_BIG_FILES */
+#ifdef DEBUG
+       {
+               xfs_dfiloff_t   o;
+
+               o = (((xfs_dfiloff_t)INT_GET(r->l0, ARCH_CONVERT) &
+                      XFS_MASK32LO(32 - BMBT_EXNTFLAG_BITLEN)) << 23) |
+                   (((xfs_dfiloff_t)INT_GET(r->l1, ARCH_CONVERT)) >> 9);
+               ASSERT((o >> 32) == 0);
+               s->br_startoff = (xfs_fileoff_t)o;
+       }
+#else  /* !DEBUG */
+       s->br_startoff = (((xfs_fileoff_t)INT_GET(r->l0, ARCH_CONVERT) &
+                           XFS_MASK32LO(32 - BMBT_EXNTFLAG_BITLEN)) << 23) |
+                        (((xfs_fileoff_t)INT_GET(r->l1, ARCH_CONVERT)) >> 9);
+#endif /* DEBUG */
+#endif /* XFS_BIG_FILES */
+#if XFS_BIG_FILESYSTEMS
+       s->br_startblock =
+               (((xfs_fsblock_t)(INT_GET(r->l1, ARCH_CONVERT) & XFS_MASK32LO(9))) << 43) | 
+               (((xfs_fsblock_t)INT_GET(r->l2, ARCH_CONVERT)) << 11) |
+               (((xfs_fsblock_t)INT_GET(r->l3, ARCH_CONVERT)) >> 21);
+#else
+#ifdef DEBUG
+       {
+               xfs_dfsbno_t    b;
+
+               b = (((xfs_dfsbno_t)(INT_GET(r->l1, ARCH_CONVERT) & XFS_MASK32LO(9))) << 43) | 
+                   (((xfs_dfsbno_t)INT_GET(r->l2, ARCH_CONVERT)) << 11) |
+                   (((xfs_dfsbno_t)INT_GET(r->l3, ARCH_CONVERT)) >> 21);
+               ASSERT((b >> 32) == 0 || ISNULLDSTARTBLOCK(b));
+               s->br_startblock = (xfs_fsblock_t)b;
+       }
+#else  /* !DEBUG */
+       s->br_startblock = (((xfs_fsblock_t)INT_GET(r->l2, ARCH_CONVERT)) << 11) |
+                          (((xfs_fsblock_t)INT_GET(r->l3, ARCH_CONVERT)) >> 21);
+#endif /* DEBUG */
+#endif /* XFS_BIG_FILESYSTEMS */
+       s->br_blockcount = (xfs_filblks_t)(INT_GET(r->l3, ARCH_CONVERT) & XFS_MASK32LO(21));
+#endif /* BMBT_USE_64 */
+       /* This is xfs_extent_state() in-line */
+       if (ext_flag) {
+               ASSERT(s->br_blockcount != 0);  /* saved for DMIG */
+               st = XFS_EXT_UNWRITTEN;
+       } else
+               st = XFS_EXT_NORM;
+       s->br_state = st;
+}
+
+/*
+ * Get the block pointer for the given level of the cursor.
+ * Fill in the buffer pointer, if applicable.
+ */
+xfs_bmbt_block_t *
+xfs_bmbt_get_block(
+       xfs_btree_cur_t         *cur,
+       int                     level,
+       xfs_buf_t                       **bpp)
+{
+       xfs_ifork_t             *ifp;
+       xfs_bmbt_block_t        *rval;
+
+       if (level < cur->bc_nlevels - 1) {
+               *bpp = cur->bc_bufs[level];
+               rval = XFS_BUF_TO_BMBT_BLOCK(*bpp);
+       } else {
+               *bpp = 0;
+               ifp = XFS_IFORK_PTR(cur->bc_private.b.ip,
+                       cur->bc_private.b.whichfork);
+               rval = ifp->if_broot;
+       }
+       return rval;
+}
+
+/*
+ * Extract the blockcount field from a bmap extent record.
+ */
+xfs_filblks_t
+xfs_bmbt_get_blockcount(
+       xfs_bmbt_rec_t  *r)
+{
+#if BMBT_USE_64
+       return (xfs_filblks_t)(INT_GET(r->l1, ARCH_CONVERT) & XFS_MASK64LO(21));
+#else  /* !BMBT_USE_64 */
+       return (xfs_filblks_t)(INT_GET(r->l3, ARCH_CONVERT) & XFS_MASK32LO(21));
+#endif /* BMBT_USE_64 */
+}
+
+/*
+ * Extract the startblock field from a bmap extent record.
+ */
+xfs_fsblock_t
+xfs_bmbt_get_startblock(
+       xfs_bmbt_rec_t  *r)
+{
+#if BMBT_USE_64
+#if XFS_BIG_FILESYSTEMS
+       return (((xfs_fsblock_t)INT_GET(r->l0, ARCH_CONVERT) & XFS_MASK64LO(9)) << 43) |
+              (((xfs_fsblock_t)INT_GET(r->l1, ARCH_CONVERT)) >> 21);
+#else
+#ifdef DEBUG
+       xfs_dfsbno_t    b;
+
+       b = (((xfs_dfsbno_t)INT_GET(r->l0, ARCH_CONVERT) & XFS_MASK64LO(9)) << 43) |
+           (((xfs_dfsbno_t)INT_GET(r->l1, ARCH_CONVERT)) >> 21);
+       ASSERT((b >> 32) == 0 || ISNULLDSTARTBLOCK(b));
+       return (xfs_fsblock_t)b;
+#else  /* !DEBUG */
+       return (xfs_fsblock_t)(((xfs_dfsbno_t)INT_GET(r->l1, ARCH_CONVERT)) >> 21);
+#endif /* DEBUG */
+#endif /* XFS_BIG_FILESYSTEMS */
+#else  /* !BMBT_USE_64 */
+#if XFS_BIG_FILESYSTEMS
+       return (((xfs_fsblock_t)(INT_GET(r->l1, ARCH_CONVERT) & XFS_MASK32LO(9))) << 43) | 
+              (((xfs_fsblock_t)INT_GET(r->l2, ARCH_CONVERT)) << 11) |
+              (((xfs_fsblock_t)INT_GET(r->l3, ARCH_CONVERT)) >> 21);
+#else
+#ifdef DEBUG
+       xfs_dfsbno_t    b;
+
+       b = (((xfs_dfsbno_t)(INT_GET(r->l1, ARCH_CONVERT) & XFS_MASK32LO(9))) << 43) | 
+           (((xfs_dfsbno_t)INT_GET(r->l2, ARCH_CONVERT)) << 11) |
+           (((xfs_dfsbno_t)INT_GET(r->l3, ARCH_CONVERT)) >> 21);
+       ASSERT((b >> 32) == 0 || ISNULLDSTARTBLOCK(b));
+       return (xfs_fsblock_t)b;
+#else  /* !DEBUG */
+       return (((xfs_fsblock_t)INT_GET(r->l2, ARCH_CONVERT)) << 11) |
+              (((xfs_fsblock_t)INT_GET(r->l3, ARCH_CONVERT)) >> 21);
+#endif /* DEBUG */
+#endif /* XFS_BIG_FILESYSTEMS */
+#endif /* BMBT_USE_64 */
+}
+
+/*
+ * Extract the startoff field from a bmap extent record.
+ */
+xfs_fileoff_t
+xfs_bmbt_get_startoff(
+       xfs_bmbt_rec_t  *r)
+{
+#if BMBT_USE_64
+#if XFS_BIG_FILES
+       return ((xfs_fileoff_t)INT_GET(r->l0, ARCH_CONVERT) &
+                XFS_MASK64LO(64 - BMBT_EXNTFLAG_BITLEN)) >> 9;
+#else  /* !XFS_BIG_FILES */
+       xfs_dfiloff_t   o;
+
+       o = ((xfs_dfiloff_t)INT_GET(r->l0, ARCH_CONVERT) &
+             XFS_MASK64LO(64 - BMBT_EXNTFLAG_BITLEN)) >> 9;
+       ASSERT((o >> 32) == 0);
+       return (xfs_fileoff_t)o;
+#endif /* XFS_BIG_FILES */
+#else  /* !BMBT_USE_64 */
+#if XFS_BIG_FILES
+       return (((xfs_fileoff_t)INT_GET(r->l0, ARCH_CONVERT) &
+                 XFS_MASK32LO(32 - BMBT_EXNTFLAG_BITLEN)) << 23) |
+              (((xfs_fileoff_t)INT_GET(r->l1, ARCH_CONVERT)) >> 9);
+#else  /* !XFS_BIG_FILES */
+#ifdef DEBUG
+       xfs_dfiloff_t   o;
+
+       o = (((xfs_dfiloff_t)INT_GET(r->l0, ARCH_CONVERT) &
+              XFS_MASK32LO(32 - BMBT_EXNTFLAG_BITLEN)) << 23) |
+           (((xfs_dfiloff_t)INT_GET(r->l1, ARCH_CONVERT)) >> 9);
+       ASSERT((o >> 32) == 0);
+       return (xfs_fileoff_t)o;
+#else  /* !DEBUG */
+       return (((xfs_fileoff_t)INT_GET(r->l0, ARCH_CONVERT) &
+                 XFS_MASK32LO(32 - BMBT_EXNTFLAG_BITLEN)) << 23) |
+              (((xfs_fileoff_t)INT_GET(r->l1, ARCH_CONVERT)) >> 9);
+#endif /* DEBUG */
+#endif /* XFS_BIG_FILES */
+#endif /* BMBT_USE_64 */
+}
+
+xfs_exntst_t
+xfs_bmbt_get_state(
+       xfs_bmbt_rec_t  *r)
+{
+       int     ext_flag;
+
+#if BMBT_USE_64
+       ext_flag = (int)((INT_GET(r->l0, ARCH_CONVERT)) >> (64 - BMBT_EXNTFLAG_BITLEN));
+#else  /* !BMBT_USE_64 */
+       ext_flag = (INT_GET(r->l0, ARCH_CONVERT) >> (32 - BMBT_EXNTFLAG_BITLEN));
+#endif /* BMBT_USE_64 */
+       return xfs_extent_state(xfs_bmbt_get_blockcount(r),
+                               ext_flag);
+}
+
+
+/*
+ * Increment cursor by one record at the level.
+ * For nonzero levels the leaf-ward information is untouched.
+ */
+int                                            /* error */
+xfs_bmbt_increment(
+       xfs_btree_cur_t         *cur,
+       int                     level,
+       int                     *stat)          /* success/failure */
+{
+       xfs_bmbt_block_t        *block;
+       xfs_buf_t                       *bp;
+       int                     error;          /* error return value */
+#ifdef XFS_BMBT_TRACE
+       static char             fname[] = "xfs_bmbt_increment";
+#endif
+       xfs_fsblock_t           fsbno;
+       int                     lev;
+       xfs_mount_t             *mp;
+       xfs_trans_t             *tp;
+
+       XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
+       XFS_BMBT_TRACE_ARGI(cur, level);
+       ASSERT(level < cur->bc_nlevels);
+       if (level < cur->bc_nlevels - 1)
+               xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
+       block = xfs_bmbt_get_block(cur, level, &bp);
+#ifdef DEBUG
+       if (error = xfs_btree_check_lblock(cur, block, level, bp)) {
+               XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+               return error;
+       }
+#endif
+       if (++cur->bc_ptrs[level] <= INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
+               XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+               *stat = 1;
+               return 0;
+       }
+       if (INT_GET(block->bb_rightsib, ARCH_CONVERT) == NULLDFSBNO) {
+               XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+               *stat = 0;
+               return 0;
+       }
+       for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
+               block = xfs_bmbt_get_block(cur, lev, &bp);
+#ifdef DEBUG
+               if (error = xfs_btree_check_lblock(cur, block, lev, bp)) {
+                       XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+                       return error;
+               }
+#endif
+               if (++cur->bc_ptrs[lev] <= INT_GET(block->bb_numrecs, ARCH_CONVERT))
+                       break;
+               if (lev < cur->bc_nlevels - 1)
+                       xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
+       }
+       if (lev == cur->bc_nlevels) {
+               XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+               *stat = 0;
+               return 0;
+       }
+       tp = cur->bc_tp;
+       mp = cur->bc_mp;
+       for (block = xfs_bmbt_get_block(cur, lev, &bp); lev > level; ) {
+               fsbno = INT_GET(*XFS_BMAP_PTR_IADDR(block, cur->bc_ptrs[lev], cur), ARCH_CONVERT);
+               if (error = xfs_btree_read_bufl(mp, tp, fsbno, 0, &bp,
+                               XFS_BMAP_BTREE_REF)) {
+                       XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+                       return error;
+               }
+               lev--;
+               xfs_btree_setbuf(cur, lev, bp);
+               block = XFS_BUF_TO_BMBT_BLOCK(bp);
+               if (error = xfs_btree_check_lblock(cur, block, lev, bp)) {
+                       XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+                       return error;
+               }
+               cur->bc_ptrs[lev] = 1;
+       }
+       XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+       *stat = 1;
+       return 0;
+}
+
+/*
+ * Insert the current record at the point referenced by cur.
+ */
+int                                    /* error */
+xfs_bmbt_insert(
+       xfs_btree_cur_t *cur,
+       int             *stat)          /* success/failure */
+{
+       int             error;          /* error return value */
+#ifdef XFS_BMBT_TRACE
+       static char     fname[] = "xfs_bmbt_insert";
+#endif
+       int             i;
+       int             level;
+       xfs_fsblock_t   nbno;
+       xfs_btree_cur_t *ncur;
+       xfs_bmbt_rec_t  nrec;
+       xfs_btree_cur_t *pcur;
+
+       XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
+       level = 0;
+       nbno = NULLFSBLOCK;
+       xfs_bmbt_set_all(&nrec, &cur->bc_rec.b);
+       ncur = (xfs_btree_cur_t *)0;
+       pcur = cur;
+       do {
+               if (error = xfs_bmbt_insrec(pcur, level++, &nbno, &nrec, &ncur,
+                               &i)) {
+                       if (pcur != cur)
+                               xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
+                       XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+                       return error;
+               }
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               if (pcur != cur && (ncur || nbno == NULLFSBLOCK)) {
+                       cur->bc_nlevels = pcur->bc_nlevels;
+                       cur->bc_private.b.allocated +=
+                               pcur->bc_private.b.allocated;
+                       pcur->bc_private.b.allocated = 0;
+                       ASSERT((cur->bc_private.b.firstblock != NULLFSBLOCK) ||
+                              (cur->bc_private.b.ip->i_d.di_flags & 
+                               XFS_DIFLAG_REALTIME));
+                       cur->bc_private.b.firstblock =
+                               pcur->bc_private.b.firstblock;
+                       ASSERT(cur->bc_private.b.flist ==
+                              pcur->bc_private.b.flist);
+                       xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
+               }
+               if (ncur) {
+                       pcur = ncur;
+                       ncur = (xfs_btree_cur_t *)0;
+               }
+       } while (nbno != NULLFSBLOCK);
+       XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+       *stat = i;
+       return 0;
+error0:
+       XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+       return error;
+}
+
+/*
+ * Log fields from the btree block header.
+ */
+void
+xfs_bmbt_log_block(
+       xfs_btree_cur_t         *cur,
+       xfs_buf_t                       *bp,
+       int                     fields)
+{
+       int                     first;
+#ifdef XFS_BMBT_TRACE
+       static char             fname[] = "xfs_bmbt_log_block";
+#endif
+       int                     last;
+       xfs_trans_t             *tp;
+       static const short      offsets[] = {
+               offsetof(xfs_bmbt_block_t, bb_magic),
+               offsetof(xfs_bmbt_block_t, bb_level),
+               offsetof(xfs_bmbt_block_t, bb_numrecs),
+               offsetof(xfs_bmbt_block_t, bb_leftsib),
+               offsetof(xfs_bmbt_block_t, bb_rightsib),
+               sizeof(xfs_bmbt_block_t)
+       };
+
+       XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
+       XFS_BMBT_TRACE_ARGBI(cur, bp, fields);
+       tp = cur->bc_tp;
+       if (bp) {
+               xfs_btree_offsets(fields, offsets, XFS_BB_NUM_BITS, &first,
+                                 &last);
+               xfs_trans_log_buf(tp, bp, first, last);
+       } else
+               xfs_trans_log_inode(tp, cur->bc_private.b.ip,
+                       XFS_ILOG_FBROOT(cur->bc_private.b.whichfork));
+       XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+}
+
+/*
+ * Log record values from the btree block.
+ */
+void
+xfs_bmbt_log_recs(
+       xfs_btree_cur_t         *cur,
+       xfs_buf_t                       *bp,
+       int                     rfirst,
+       int                     rlast)
+{
+       xfs_bmbt_block_t        *block;
+       int                     first;
+#ifdef XFS_BMBT_TRACE
+       static char             fname[] = "xfs_bmbt_log_recs";
+#endif
+       int                     last;
+       xfs_bmbt_rec_t          *rp;
+       xfs_trans_t             *tp;
+
+       XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
+       XFS_BMBT_TRACE_ARGBII(cur, bp, rfirst, rlast);
+       ASSERT(bp);
+       tp = cur->bc_tp;
+       block = XFS_BUF_TO_BMBT_BLOCK(bp);
+       rp = XFS_BMAP_REC_DADDR(block, 1, cur);
+       first = (int)((xfs_caddr_t)&rp[rfirst - 1] - (xfs_caddr_t)block);
+       last = (int)(((xfs_caddr_t)&rp[rlast] - 1) - (xfs_caddr_t)block);
+       xfs_trans_log_buf(tp, bp, first, last);
+       XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+}
+
+int                                    /* error */
+xfs_bmbt_lookup_eq(
+       xfs_btree_cur_t *cur,
+       xfs_fileoff_t   off,
+       xfs_fsblock_t   bno,
+       xfs_filblks_t   len,
+       int             *stat)          /* success/failure */
+{
+       cur->bc_rec.b.br_startoff = off;
+       cur->bc_rec.b.br_startblock = bno;
+       cur->bc_rec.b.br_blockcount = len;
+       return xfs_bmbt_lookup(cur, XFS_LOOKUP_EQ, stat);
+}
+
+int                                    /* error */
+xfs_bmbt_lookup_ge(
+       xfs_btree_cur_t *cur,
+       xfs_fileoff_t   off,
+       xfs_fsblock_t   bno,
+       xfs_filblks_t   len,
+       int             *stat)          /* success/failure */
+{
+       cur->bc_rec.b.br_startoff = off;
+       cur->bc_rec.b.br_startblock = bno;
+       cur->bc_rec.b.br_blockcount = len;
+       return xfs_bmbt_lookup(cur, XFS_LOOKUP_GE, stat);
+}
+
+int                                    /* error */
+xfs_bmbt_lookup_le(
+       xfs_btree_cur_t *cur,
+       xfs_fileoff_t   off,
+       xfs_fsblock_t   bno,
+       xfs_filblks_t   len,
+       int             *stat)          /* success/failure */
+{
+       cur->bc_rec.b.br_startoff = off;
+       cur->bc_rec.b.br_startblock = bno;
+       cur->bc_rec.b.br_blockcount = len;
+       return xfs_bmbt_lookup(cur, XFS_LOOKUP_LE, stat);
+}
+
+/*
+ * Give the bmap btree a new root block.  Copy the old broot contents
+ * down into a real block and make the broot point to it.
+ */
+int                                            /* error */
+xfs_bmbt_newroot(
+       xfs_btree_cur_t         *cur,           /* btree cursor */
+       int                     *logflags,      /* logging flags for inode */
+       int                     *stat)          /* return status - 0 fail */
+{
+       xfs_alloc_arg_t         args;           /* allocation arguments */
+       xfs_bmbt_block_t        *block;         /* bmap btree block */
+       xfs_buf_t                       *bp;            /* buffer for block */
+       xfs_bmbt_block_t        *cblock;        /* child btree block */
+       xfs_bmbt_key_t          *ckp;           /* child key pointer */
+       xfs_bmbt_ptr_t          *cpp;           /* child ptr pointer */
+       int                     error;          /* error return code */
+#ifdef XFS_BMBT_TRACE
+       static char             fname[] = "xfs_bmbt_newroot";
+#endif
+#ifdef DEBUG
+       int                     i;              /* loop counter */
+#endif
+       xfs_bmbt_key_t          *kp;            /* pointer to bmap btree key */
+       int                     level;          /* btree level */
+       xfs_bmbt_ptr_t          *pp;            /* pointer to bmap block addr */
+
+       XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
+       level = cur->bc_nlevels - 1;
+       block = xfs_bmbt_get_block(cur, level, &bp);
+       /*
+        * Copy the root into a real block.
+        */
+       args.mp = cur->bc_mp;
+       pp = XFS_BMAP_PTR_IADDR(block, 1, cur);
+       args.tp = cur->bc_tp;
+       args.fsbno = cur->bc_private.b.firstblock;
+       args.mod = args.minleft = args.alignment = args.total = args.isfl =
+               args.userdata = args.minalignslop = 0;
+       args.minlen = args.maxlen = args.prod = 1;
+       args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL;
+       if (args.fsbno == NULLFSBLOCK) {
+#ifdef DEBUG
+               if (error = xfs_btree_check_lptr(cur, INT_GET(*pp, ARCH_CONVERT), level)) {
+                       XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+                       return error;
+               }
+#endif
+               args.fsbno = INT_GET(*pp, ARCH_CONVERT);
+               args.type = XFS_ALLOCTYPE_START_BNO;
+       } else if (args.wasdel)
+               args.type = XFS_ALLOCTYPE_FIRST_AG;
+       else
+               args.type = XFS_ALLOCTYPE_NEAR_BNO;
+       if (error = xfs_alloc_vextent(&args)) {
+               XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+               return error;
+       }
+       if (args.fsbno == NULLFSBLOCK) {
+               XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+               *stat = 0;
+               return 0;
+       }
+       ASSERT(args.len == 1);
+       cur->bc_private.b.firstblock = args.fsbno;
+       cur->bc_private.b.allocated++;
+       cur->bc_private.b.ip->i_d.di_nblocks++;
+       if (XFS_IS_QUOTA_ON(args.mp) &&
+           cur->bc_private.b.ip->i_ino != args.mp->m_sb.sb_uquotino &&
+           cur->bc_private.b.ip->i_ino != args.mp->m_sb.sb_pquotino)
+               xfs_trans_mod_dquot_byino(args.tp, cur->bc_private.b.ip,
+                                         XFS_TRANS_DQ_BCOUNT, 1L);
+       bp = xfs_btree_get_bufl(args.mp, cur->bc_tp, args.fsbno, 0);
+       cblock = XFS_BUF_TO_BMBT_BLOCK(bp);
+       *cblock = *block;
+       INT_MOD(block->bb_level, ARCH_CONVERT, +1);
+       INT_SET(block->bb_numrecs, ARCH_CONVERT, 1);
+       cur->bc_nlevels++;
+       cur->bc_ptrs[level + 1] = 1;
+       kp = XFS_BMAP_KEY_IADDR(block, 1, cur);
+       ckp = XFS_BMAP_KEY_IADDR(cblock, 1, cur);
+       bcopy(kp, ckp, INT_GET(cblock->bb_numrecs, ARCH_CONVERT) * sizeof(*kp));
+       cpp = XFS_BMAP_PTR_IADDR(cblock, 1, cur);
+#ifdef DEBUG
+       for (i = 0; i < INT_GET(cblock->bb_numrecs, ARCH_CONVERT); i++) {
+               if (error = xfs_btree_check_lptr(cur, INT_GET(pp[i], ARCH_CONVERT), level)) {
+                       XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+                       return error;
+               }
+       }
+#endif
+       bcopy(pp, cpp, INT_GET(cblock->bb_numrecs, ARCH_CONVERT) * sizeof(*pp));
+#ifdef DEBUG
+       if (error = xfs_btree_check_lptr(cur, (xfs_bmbt_ptr_t)args.fsbno,
+                       level)) {
+               XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+               return error;
+       }
+#endif
+       INT_SET(*pp, ARCH_CONVERT, args.fsbno);
+       xfs_iroot_realloc(cur->bc_private.b.ip, 1 - INT_GET(cblock->bb_numrecs, ARCH_CONVERT),
+               cur->bc_private.b.whichfork);
+       xfs_btree_setbuf(cur, level, bp);
+       /*
+        * Do all this logging at the end so that 
+        * the root is at the right level.
+        */
+       xfs_bmbt_log_block(cur, bp, XFS_BB_ALL_BITS);
+       xfs_bmbt_log_keys(cur, bp, 1, INT_GET(cblock->bb_numrecs, ARCH_CONVERT));
+       xfs_bmbt_log_ptrs(cur, bp, 1, INT_GET(cblock->bb_numrecs, ARCH_CONVERT));
+       XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+       *logflags |=
+               XFS_ILOG_CORE | XFS_ILOG_FBROOT(cur->bc_private.b.whichfork);
+       *stat = 1;
+       return 0;
+}
+
+/*
+ * Set all the fields in a bmap extent record from the uncompressed form.
+ */
+void
+xfs_bmbt_set_all(
+       xfs_bmbt_rec_t  *r,
+       xfs_bmbt_irec_t *s)
+{
+       int     extent_flag;
+
+       ASSERT((s->br_state == XFS_EXT_NORM) ||
+               (s->br_state == XFS_EXT_UNWRITTEN));
+       extent_flag = (s->br_state == XFS_EXT_NORM) ? 0 : 1;
+#if XFS_BIG_FILES
+       ASSERT((s->br_startoff & XFS_MASK64HI(9)) == 0);
+       ASSERT((s->br_blockcount & XFS_MASK64HI(43)) == 0);
+#else  /* !XFS_BIG_FILES */
+       ASSERT((s->br_blockcount & XFS_MASK32HI(11)) == 0);
+#endif /* XFS_BIG_FILES */
+#if XFS_BIG_FILESYSTEMS
+       ASSERT((s->br_startblock & XFS_MASK64HI(12)) == 0);
+#endif /* XFS_BIG_FILESYSTEMS */
+#if BMBT_USE_64
+#if XFS_BIG_FILESYSTEMS
+       INT_SET(r->l0, ARCH_CONVERT, ((xfs_bmbt_rec_base_t)extent_flag << 63) | 
+                 ((xfs_bmbt_rec_base_t)s->br_startoff << 9) |
+                 ((xfs_bmbt_rec_base_t)s->br_startblock >> 43));
+       INT_SET(r->l1, ARCH_CONVERT, ((xfs_bmbt_rec_base_t)s->br_startblock << 21) | 
+                 ((xfs_bmbt_rec_base_t)s->br_blockcount &
+                  (xfs_bmbt_rec_base_t)XFS_MASK64LO(21)));
+#else  /* !XFS_BIG_FILESYSTEMS */
+       if (ISNULLSTARTBLOCK(s->br_startblock)) {
+               INT_SET(r->l0, ARCH_CONVERT, ((xfs_bmbt_rec_base_t)extent_flag << 63) |
+                       ((xfs_bmbt_rec_base_t)s->br_startoff << 9) |
+                         (xfs_bmbt_rec_base_t)XFS_MASK64LO(9));
+               INT_SET(r->l1, ARCH_CONVERT, XFS_MASK64HI(11) |
+                         ((xfs_bmbt_rec_base_t)s->br_startblock << 21) |
+                         ((xfs_bmbt_rec_base_t)s->br_blockcount &
+                          (xfs_bmbt_rec_base_t)XFS_MASK64LO(21)));
+       } else {
+               INT_SET(r->l0, ARCH_CONVERT, ((xfs_bmbt_rec_base_t)extent_flag << 63) |
+                       ((xfs_bmbt_rec_base_t)s->br_startoff << 9));
+               INT_SET(r->l1, ARCH_CONVERT, ((xfs_bmbt_rec_base_t)s->br_startblock << 21) | 
+                         ((xfs_bmbt_rec_base_t)s->br_blockcount &
+                          (xfs_bmbt_rec_base_t)XFS_MASK64LO(21)));
+       }
+#endif /* XFS_BIG_FILESYSTEMS */
+#else  /* !BMBT_USE_64 */
+       INT_SET(r->l0, ARCH_CONVERT, ((xfs_bmbt_rec_base_t)extent_flag << 31) |
+               ((xfs_bmbt_rec_base_t)(s->br_startoff >> 23)));
+       INT_SET(r->l3, ARCH_CONVERT, (((xfs_bmbt_rec_base_t)s->br_startblock) << 21) |
+                 ((xfs_bmbt_rec_base_t)(s->br_blockcount & XFS_MASK32LO(21))));
+#if XFS_BIG_FILESYSTEMS
+       INT_SET(r->l1, ARCH_CONVERT, (((xfs_bmbt_rec_base_t)s->br_startoff) << 9) |
+                 ((xfs_bmbt_rec_base_t)(s->br_startblock >> 43)));
+       INT_SET(r->l2, ARCH_CONVERT, (xfs_bmbt_rec_base_t)(s->br_startblock >> 11));
+#else  /* !XFS_BIG_FILESYSTEMS */
+       if (ISNULLSTARTBLOCK(s->br_startblock)) {
+               INT_SET(r->l1, ARCH_CONVERT, (xfs_bmbt_rec_base_t)(s->br_startoff << 9) |
+                         (xfs_bmbt_rec_base_t)XFS_MASK32LO(9));
+               INT_SET(r->l2, ARCH_CONVERT, (xfs_bmbt_rec_base_t)XFS_MASK32HI(11) |
+                         (xfs_bmbt_rec_base_t)(s->br_startblock >> 11));
+       } else {
+               INT_SET(r->l1, ARCH_CONVERT, (xfs_bmbt_rec_base_t)(s->br_startoff << 9));
+               INT_SET(r->l2, ARCH_CONVERT, (xfs_bmbt_rec_base_t)(s->br_startblock >> 11));
+       }
+#endif /* XFS_BIG_FILESYSTEMS */
+#endif /* BMBT_USE_64 */
+}
+
+/*
+ * Set all the fields in a bmap extent record from the arguments.
+ */
+void
+xfs_bmbt_set_allf(
+       xfs_bmbt_rec_t  *r,
+       xfs_fileoff_t   o,
+       xfs_fsblock_t   b,
+       xfs_filblks_t   c,
+       xfs_exntst_t    v)
+{
+       int     extent_flag;
+
+       ASSERT((v == XFS_EXT_NORM) || (v == XFS_EXT_UNWRITTEN));
+       extent_flag = (v == XFS_EXT_NORM) ? 0 : 1;
+#if XFS_BIG_FILES
+       ASSERT((o & XFS_MASK64HI(64-BMBT_STARTOFF_BITLEN)) == 0);
+       ASSERT((c & XFS_MASK64HI(64-BMBT_BLOCKCOUNT_BITLEN)) == 0);
+#else  /* !XFS_BIG_FILES */
+       ASSERT((c & XFS_MASK32HI(11)) == 0);
+#endif /* XFS_BIG_FILES */
+#if XFS_BIG_FILESYSTEMS
+       ASSERT((b & XFS_MASK64HI(64-BMBT_STARTBLOCK_BITLEN)) == 0);
+#endif /* XFS_BIG_FILESYSTEMS */
+#if BMBT_USE_64
+#if XFS_BIG_FILESYSTEMS
+       INT_SET(r->l0, ARCH_CONVERT, ((xfs_bmbt_rec_base_t)extent_flag << 63) | 
+               ((xfs_bmbt_rec_base_t)o << 9) |
+               ((xfs_bmbt_rec_base_t)b >> 43));
+       INT_SET(r->l1, ARCH_CONVERT, ((xfs_bmbt_rec_base_t)b << 21) | 
+                 ((xfs_bmbt_rec_base_t)c &
+                  (xfs_bmbt_rec_base_t)XFS_MASK64LO(21)));
+#else  /* !XFS_BIG_FILESYSTEMS */
+       if (ISNULLSTARTBLOCK(b)) {
+               INT_SET(r->l0, ARCH_CONVERT, ((xfs_bmbt_rec_base_t)extent_flag << 63) |
+                       ((xfs_bmbt_rec_base_t)o << 9) |
+                        (xfs_bmbt_rec_base_t)XFS_MASK64LO(9));
+               INT_SET(r->l1, ARCH_CONVERT, XFS_MASK64HI(11) |
+                         ((xfs_bmbt_rec_base_t)b << 21) |
+                         ((xfs_bmbt_rec_base_t)c &
+                          (xfs_bmbt_rec_base_t)XFS_MASK64LO(21)));
+       } else {
+               INT_SET(r->l0, ARCH_CONVERT, ((xfs_bmbt_rec_base_t)extent_flag << 63) |
+                       ((xfs_bmbt_rec_base_t)o << 9));
+               INT_SET(r->l1, ARCH_CONVERT, ((xfs_bmbt_rec_base_t)b << 21) | 
+                         ((xfs_bmbt_rec_base_t)c &
+                          (xfs_bmbt_rec_base_t)XFS_MASK64LO(21)));
+       }
+#endif /* XFS_BIG_FILESYSTEMS */
+#else  /* !BMBT_USE_64 */
+       INT_SET(r->l0, ARCH_CONVERT, ((xfs_bmbt_rec_base_t)extent_flag << 31) |
+               ((xfs_bmbt_rec_base_t)(o >> 23)));
+       INT_SET(r->l3, ARCH_CONVERT, (((xfs_bmbt_rec_base_t)b) << 21) |
+                 ((xfs_bmbt_rec_base_t)(c & XFS_MASK32LO(21))));
+#if XFS_BIG_FILESYSTEMS
+       INT_SET(r->l1, ARCH_CONVERT, (((xfs_bmbt_rec_base_t)o) << 9) |
+                 ((xfs_bmbt_rec_base_t)(b >> 43)));
+       INT_SET(r->l2, ARCH_CONVERT, (xfs_bmbt_rec_base_t)(b >> 11));
+#else  /* !XFS_BIG_FILESYSTEMS */
+       if (ISNULLSTARTBLOCK(b)) {
+               INT_SET(r->l1, ARCH_CONVERT, (xfs_bmbt_rec_base_t)(o << 9) |
+                         (xfs_bmbt_rec_base_t)XFS_MASK32LO(9));
+               INT_SET(r->l2, ARCH_CONVERT, (xfs_bmbt_rec_base_t)XFS_MASK32HI(11) |
+                         (xfs_bmbt_rec_base_t)(b >> 11));
+       } else {
+               INT_SET(r->l1, ARCH_CONVERT, (xfs_bmbt_rec_base_t)(o << 9));
+               INT_SET(r->l2, ARCH_CONVERT, (xfs_bmbt_rec_base_t)(b >> 11));
+       }
+#endif /* XFS_BIG_FILESYSTEMS */
+#endif /* BMBT_USE_64 */
+}
+
+/*
+ * Set the blockcount field in a bmap extent record.
+ */
+void
+xfs_bmbt_set_blockcount(
+       xfs_bmbt_rec_t  *r,
+       xfs_filblks_t   v)
+{
+#if XFS_BIG_FILES
+       ASSERT((v & XFS_MASK64HI(43)) == 0);
+#else  /* !XFS_BIG_FILES */
+       ASSERT((v & XFS_MASK32HI(11)) == 0);
+#endif
+#if BMBT_USE_64
+       INT_SET(r->l1, ARCH_CONVERT, (INT_GET(r->l1, ARCH_CONVERT) & (xfs_bmbt_rec_base_t)XFS_MASK64HI(43)) |
+                 (xfs_bmbt_rec_base_t)(v & XFS_MASK64LO(21)));
+#else  /* !BMBT_USE_64 */
+       INT_SET(r->l3, ARCH_CONVERT, (INT_GET(r->l3, ARCH_CONVERT) & (xfs_bmbt_rec_base_t)XFS_MASK32HI(11)) |
+                 ((xfs_bmbt_rec_base_t)v & XFS_MASK32LO(21)));
+#endif /* BMBT_USE_64 */
+}
+
+/*
+ * Set the startblock field in a bmap extent record.
+ */
+void
+xfs_bmbt_set_startblock(
+       xfs_bmbt_rec_t  *r,
+       xfs_fsblock_t   v)
+{
+#if XFS_BIG_FILESYSTEMS
+       ASSERT((v & XFS_MASK64HI(12)) == 0);
+#endif /* XFS_BIG_FILESYSTEMS */
+#if BMBT_USE_64
+#if XFS_BIG_FILESYSTEMS
+       INT_SET(r->l0, ARCH_CONVERT, (INT_GET(r->l0, ARCH_CONVERT) & (xfs_bmbt_rec_base_t)XFS_MASK64HI(55)) |
+                 (xfs_bmbt_rec_base_t)(v >> 43));
+       INT_SET(r->l1, ARCH_CONVERT, (INT_GET(r->l1, ARCH_CONVERT) & (xfs_bmbt_rec_base_t)XFS_MASK64LO(21)) |
+                 (xfs_bmbt_rec_base_t)(v << 21));
+#else  /* !XFS_BIG_FILESYSTEMS */
+       if (ISNULLSTARTBLOCK(v)) {
+               INT_SET(r->l0, ARCH_CONVERT, (INT_GET(r->l0, ARCH_CONVERT) | (xfs_bmbt_rec_base_t)XFS_MASK64LO(9)));
+               INT_SET(r->l1, ARCH_CONVERT, (xfs_bmbt_rec_base_t)XFS_MASK64HI(11) |
+                         ((xfs_bmbt_rec_base_t)v << 21) |
+                         (INT_GET(r->l1, ARCH_CONVERT) & (xfs_bmbt_rec_base_t)XFS_MASK64LO(21)));
+       } else {
+               INT_SET(r->l0, ARCH_CONVERT, (INT_GET(r->l0, ARCH_CONVERT) & ~(xfs_bmbt_rec_base_t)XFS_MASK64LO(9)));
+               INT_SET(r->l1, ARCH_CONVERT, ((xfs_bmbt_rec_base_t)v << 21) |
+                         (INT_GET(r->l1, ARCH_CONVERT) & (xfs_bmbt_rec_base_t)XFS_MASK64LO(21)));
+       }
+#endif /* XFS_BIG_FILESYSTEMS */
+#else  /* !BMBT_USE_64 */
+#if XFS_BIG_FILESYSTEMS
+       INT_SET(r->l1, ARCH_CONVERT, (INT_GET(r->l1, ARCH_CONVERT) & XFS_MASK32HI(23)) | (xfs_bmbt_rec_base_t)(v >> 43));
+       INT_SET(r->l2, ARCH_CONVERT, (xfs_bmbt_rec_base_t)(v >> 11));
+#else  /* !XFS_BIG_FILESYSTEMS */
+       if (ISNULLSTARTBLOCK(v)) {
+               INT_SET(r->l1, ARCH_CONVERT, (INT_GET(r->l1, ARCH_CONVERT) | XFS_MASK32LO(9)));
+               INT_SET(r->l2, ARCH_CONVERT, XFS_MASK32HI(11) | (xfs_bmbt_rec_base_t)(v >> 11));
+       } else {
+               INT_SET(r->l1, ARCH_CONVERT, (INT_GET(r->l1, ARCH_CONVERT) & ~XFS_MASK32LO(9)));
+               INT_SET(r->l2, ARCH_CONVERT, (xfs_bmbt_rec_base_t)(v >> 11));
+       }
+#endif /* XFS_BIG_FILESYSTEMS */
+       INT_SET(r->l3, ARCH_CONVERT, (INT_GET(r->l3, ARCH_CONVERT) & XFS_MASK32LO(21)) |
+                 (((xfs_bmbt_rec_base_t)v) << 21));
+#endif /* BMBT_USE_64 */
+}
+
+/*
+ * Set the startoff field in a bmap extent record.
+ */
+void
+xfs_bmbt_set_startoff(
+       xfs_bmbt_rec_t  *r,
+       xfs_fileoff_t   v)
+{
+#if XFS_BIG_FILES
+       ASSERT((v & XFS_MASK64HI(9)) == 0);
+#endif /* XFS_BIG_FILES */
+#if BMBT_USE_64
+       INT_SET(r->l0, ARCH_CONVERT, (INT_GET(r->l0, ARCH_CONVERT) & (xfs_bmbt_rec_base_t) XFS_MASK64HI(1)) |
+               ((xfs_bmbt_rec_base_t)v << 9) |
+                 (INT_GET(r->l0, ARCH_CONVERT) & (xfs_bmbt_rec_base_t)XFS_MASK64LO(9)));
+#else  /* !BMBT_USE_64 */
+       INT_SET(r->l0, ARCH_CONVERT, (INT_GET(r->l0, ARCH_CONVERT) & (xfs_bmbt_rec_base_t) XFS_MASK32HI(1)) |
+               (xfs_bmbt_rec_base_t)(v >> 23));
+       INT_SET(r->l1, ARCH_CONVERT, ((xfs_bmbt_rec_base_t)v << 9) |
+                 (INT_GET(r->l1, ARCH_CONVERT) & (xfs_bmbt_rec_base_t)XFS_MASK32LO(9)));
+#endif /* BMBT_USE_64 */
+}
+
+/*
+ * Set the extent state field in a bmap extent record.
+ */
+void
+xfs_bmbt_set_state(
+       xfs_bmbt_rec_t  *r,
+       xfs_exntst_t    v)
+{
+       ASSERT(v == XFS_EXT_NORM || v == XFS_EXT_UNWRITTEN);
+       if (v == XFS_EXT_NORM)
+#if BMBT_USE_64
+               INT_SET(r->l0, ARCH_CONVERT, INT_GET(r->l0, ARCH_CONVERT) & XFS_MASK64LO(64 - BMBT_EXNTFLAG_BITLEN));
+#else  /* !BMBT_USE_64 */
+               INT_SET(r->l0, ARCH_CONVERT, INT_GET(r->l0, ARCH_CONVERT) & XFS_MASK32LO(32 - BMBT_EXNTFLAG_BITLEN));
+#endif /* BMBT_USE_64 */
+       else
+#if BMBT_USE_64
+               INT_SET(r->l0, ARCH_CONVERT, INT_GET(r->l0, ARCH_CONVERT) | XFS_MASK64HI(BMBT_EXNTFLAG_BITLEN));
+#else  /* !BMBT_USE_64 */
+               INT_SET(r->l0, ARCH_CONVERT, INT_GET(r->l0, ARCH_CONVERT) | XFS_MASK32HI(BMBT_EXNTFLAG_BITLEN));
+#endif /* BMBT_USE_64 */
+}
+
+/*
+ * Convert in-memory form of btree root to on-disk form.
+ */
+void
+xfs_bmbt_to_bmdr(
+       xfs_bmbt_block_t        *rblock,
+       int                     rblocklen,
+       xfs_bmdr_block_t        *dblock,
+       int                     dblocklen)
+{
+       int                     dmxr;
+       xfs_bmbt_key_t          *fkp;
+       xfs_bmbt_ptr_t          *fpp;
+       xfs_bmbt_key_t          *tkp;
+       xfs_bmbt_ptr_t          *tpp;
+
+       ASSERT(INT_GET(rblock->bb_magic, ARCH_CONVERT) == XFS_BMAP_MAGIC);
+       ASSERT(INT_GET(rblock->bb_leftsib, ARCH_CONVERT) == NULLDFSBNO);
+       ASSERT(INT_GET(rblock->bb_rightsib, ARCH_CONVERT) == NULLDFSBNO);
+       ASSERT(INT_GET(rblock->bb_level, ARCH_CONVERT) > 0);
+       dblock->bb_level = rblock->bb_level;    /* both in on-disk format */
+       dblock->bb_numrecs = rblock->bb_numrecs;/* both in on-disk format */
+       dmxr = (int)XFS_BTREE_BLOCK_MAXRECS(dblocklen, xfs_bmdr, 0);
+       fkp = XFS_BMAP_BROOT_KEY_ADDR(rblock, 1, rblocklen);
+       tkp = XFS_BTREE_KEY_ADDR(dblocklen, xfs_bmdr, dblock, 1, dmxr);
+       fpp = XFS_BMAP_BROOT_PTR_ADDR(rblock, 1, rblocklen);
+       tpp = XFS_BTREE_PTR_ADDR(dblocklen, xfs_bmdr, dblock, 1, dmxr);
+       dmxr = INT_GET(dblock->bb_numrecs, ARCH_CONVERT);
+       bcopy(fkp, tkp, sizeof(*fkp) * dmxr);
+       bcopy(fpp, tpp, sizeof(*fpp) * dmxr); /* INT_: direct copy */
+}
+
+/*
+ * Update the record to the passed values.
+ */
+int
+xfs_bmbt_update(
+       xfs_btree_cur_t         *cur,
+       xfs_fileoff_t           off,
+       xfs_fsblock_t           bno,
+       xfs_filblks_t           len,
+       xfs_exntst_t            state)
+{
+       xfs_bmbt_block_t        *block;
+       xfs_buf_t                       *bp;
+       int                     error;
+#ifdef XFS_BMBT_TRACE
+       static char             fname[] = "xfs_bmbt_update";
+#endif
+       xfs_bmbt_key_t          key;
+       int                     ptr;
+       xfs_bmbt_rec_t          *rp;
+
+       XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
+       XFS_BMBT_TRACE_ARGFFFI(cur, (xfs_dfiloff_t)off, (xfs_dfsbno_t)bno,
+               (xfs_dfilblks_t)len, (int)state);
+       block = xfs_bmbt_get_block(cur, 0, &bp);
+#ifdef DEBUG
+       if (error = xfs_btree_check_lblock(cur, block, 0, bp)) {
+               XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+               return error;
+       }
+#endif
+       ptr = cur->bc_ptrs[0];
+       rp = XFS_BMAP_REC_IADDR(block, ptr, cur);
+       xfs_bmbt_set_allf(rp, off, bno, len, state);
+       xfs_bmbt_log_recs(cur, bp, ptr, ptr);
+       if (ptr > 1) {
+               XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+               return 0;
+       }
+       INT_SET(key.br_startoff, ARCH_CONVERT, off);
+       if (error = xfs_bmbt_updkey(cur, &key, 1)) {
+               XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+               return error;
+       }
+       XFS_BMBT_TRACE_CURSOR(cur, EXIT);
+       return 0;
+}
+
+/*
+ * Check an extent list, which has just been read, for
+ * any bit in the extent flag field. ASSERT on debug
+ * kernels, as this condition should not occur.
+ * Return an error condition (1) if any flags found,
+ * otherwise return 0.
+ */
+int
+xfs_check_nostate_extents(
+       xfs_bmbt_rec_t          *ep,
+       xfs_extnum_t            num)
+{
+       for (; num > 0; num--, ep++) {
+               if (
+#if BMBT_USE_64
+                       ((INT_GET(ep->l0, ARCH_CONVERT)) >> (64 - BMBT_EXNTFLAG_BITLEN)) != 0
+#else  /* !BMBT_USE_64 */
+                       ((INT_GET(ep->l0, ARCH_CONVERT)) >> (32 - BMBT_EXNTFLAG_BITLEN)) != 0
+#endif /* BMBT_USE_64 */
+               ) {
+                       ASSERT(0);
+                       return 1;
+               }
+       }
+       return 0;
+}
diff --git a/libxfs/xfs_btree.c b/libxfs/xfs_btree.c
new file mode 100644 (file)
index 0000000..73cdd9c
--- /dev/null
@@ -0,0 +1,889 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+/*
+ * This file contains common code for the space manager's btree implementations.
+ */
+
+#include <xfs.h>
+
+/*
+ * Cursor allocation zone.
+ */
+xfs_zone_t     *xfs_btree_cur_zone;
+
+/*
+ * Btree magic numbers.
+ */
+const __uint32_t xfs_magics[XFS_BTNUM_MAX] =
+{
+       XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC
+};
+
+/* 
+ * Prototypes for internal routines.
+ */
+
+/*
+ * Checking routine: return maxrecs for the block.
+ */
+STATIC int                             /* number of records fitting in block */
+xfs_btree_maxrecs(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       xfs_btree_block_t       *block);/* generic btree block pointer */
+
+/*
+ * Internal routines.
+ */
+
+/*
+ * Checking routine: return maxrecs for the block.
+ */
+STATIC int                             /* number of records fitting in block */
+xfs_btree_maxrecs(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       xfs_btree_block_t       *block) /* generic btree block pointer */
+{
+       switch (cur->bc_btnum) {
+       case XFS_BTNUM_BNO:
+       case XFS_BTNUM_CNT:
+               return (int)XFS_ALLOC_BLOCK_MAXRECS(INT_GET(block->bb_h.bb_level, ARCH_CONVERT), cur);
+       case XFS_BTNUM_BMAP:
+               return (int)XFS_BMAP_BLOCK_IMAXRECS(INT_GET(block->bb_h.bb_level, ARCH_CONVERT), cur);
+       case XFS_BTNUM_INO:
+               return (int)XFS_INOBT_BLOCK_MAXRECS(INT_GET(block->bb_h.bb_level, ARCH_CONVERT), cur);
+       default:
+               ASSERT(0);
+               return 0;
+       }
+}
+
+/*
+ * External routines.
+ */
+
+#ifdef DEBUG
+/*
+ * Debug routine: check that block header is ok.
+ */
+void
+xfs_btree_check_block(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       xfs_btree_block_t       *block, /* generic btree block pointer */
+       int                     level,  /* level of the btree block */
+       xfs_buf_t               *bp)    /* buffer containing block, if any */
+{
+       if (XFS_BTREE_LONG_PTRS(cur->bc_btnum))
+               xfs_btree_check_lblock(cur, (xfs_btree_lblock_t *)block, level,
+                       bp);
+       else
+               xfs_btree_check_sblock(cur, (xfs_btree_sblock_t *)block, level,
+                       bp);
+}
+
+/*
+ * Debug routine: check that keys are in the right order.
+ */
+void
+xfs_btree_check_key(
+       xfs_btnum_t     btnum,          /* btree identifier */
+       void            *ak1,           /* pointer to left (lower) key */
+       void            *ak2)           /* pointer to right (higher) key */
+{
+       switch (btnum) {
+       case XFS_BTNUM_BNO: {
+               xfs_alloc_key_t *k1;
+               xfs_alloc_key_t *k2;
+
+               k1 = ak1;
+               k2 = ak2;
+               ASSERT(INT_GET(k1->ar_startblock, ARCH_CONVERT) < INT_GET(k2->ar_startblock, ARCH_CONVERT));
+               break;
+           }
+       case XFS_BTNUM_CNT: {
+               xfs_alloc_key_t *k1;
+               xfs_alloc_key_t *k2;
+
+               k1 = ak1;
+               k2 = ak2;
+               ASSERT(INT_GET(k1->ar_blockcount, ARCH_CONVERT) < INT_GET(k2->ar_blockcount, ARCH_CONVERT) ||
+                      (INT_GET(k1->ar_blockcount, ARCH_CONVERT) == INT_GET(k2->ar_blockcount, ARCH_CONVERT) &&
+                       INT_GET(k1->ar_startblock, ARCH_CONVERT) < INT_GET(k2->ar_startblock, ARCH_CONVERT)));
+               break;
+           }
+       case XFS_BTNUM_BMAP: {
+               xfs_bmbt_key_t  *k1;
+               xfs_bmbt_key_t  *k2;
+
+               k1 = ak1; 
+               k2 = ak2;
+               ASSERT(INT_GET(k1->br_startoff, ARCH_CONVERT) < INT_GET(k2->br_startoff, ARCH_CONVERT));
+               break;
+           }
+       case XFS_BTNUM_INO: {
+               xfs_inobt_key_t *k1;
+               xfs_inobt_key_t *k2;
+
+               k1 = ak1;
+               k2 = ak2;
+               ASSERT(INT_GET(k1->ir_startino, ARCH_CONVERT) < INT_GET(k2->ir_startino, ARCH_CONVERT));
+               break;
+           }
+       default:
+               ASSERT(0);
+       }
+}
+#endif /* DEBUG */
+
+/*
+ * Checking routine: check that long form block header is ok.
+ */
+/* ARGSUSED */
+int                                    /* error (0 or EFSCORRUPTED) */
+xfs_btree_check_lblock(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       xfs_btree_lblock_t      *block, /* btree long form block pointer */
+       int                     level,  /* level of the btree block */
+       xfs_buf_t               *bp)    /* buffer for block, if any */
+{
+       int                     lblock_ok; /* block passes checks */
+       xfs_mount_t             *mp;    /* file system mount point */
+
+       mp = cur->bc_mp;
+       lblock_ok =
+               INT_GET(block->bb_magic, ARCH_CONVERT) == xfs_magics[cur->bc_btnum] &&
+               INT_GET(block->bb_level, ARCH_CONVERT) == level &&
+               INT_GET(block->bb_numrecs, ARCH_CONVERT) <=
+                       xfs_btree_maxrecs(cur, (xfs_btree_block_t *)block) &&
+               INT_GET(block->bb_leftsib, ARCH_CONVERT) != 0 &&
+               (INT_GET(block->bb_leftsib, ARCH_CONVERT) == NULLDFSBNO ||
+                XFS_FSB_SANITY_CHECK(mp, INT_GET(block->bb_leftsib, ARCH_CONVERT))) &&
+               INT_GET(block->bb_rightsib, ARCH_CONVERT) != 0 &&
+               (INT_GET(block->bb_rightsib, ARCH_CONVERT) == NULLDFSBNO ||
+                XFS_FSB_SANITY_CHECK(mp, INT_GET(block->bb_rightsib, ARCH_CONVERT)));
+       if (XFS_TEST_ERROR(!lblock_ok, mp, XFS_ERRTAG_BTREE_CHECK_LBLOCK,
+                       XFS_RANDOM_BTREE_CHECK_LBLOCK)) {
+#pragma mips_frequency_hint NEVER
+               if (bp)
+                       xfs_buftrace("LBTREE ERROR", bp);
+               return XFS_ERROR(EFSCORRUPTED);
+       }
+       return 0;
+}
+
+/*
+ * Checking routine: check that (long) pointer is ok.
+ */
+int                                    /* error (0 or EFSCORRUPTED) */
+xfs_btree_check_lptr(
+       xfs_btree_cur_t *cur,           /* btree cursor */
+       xfs_dfsbno_t    ptr,            /* btree block disk address */
+       int             level)          /* btree block level */
+{
+       xfs_mount_t     *mp;            /* file system mount point */
+
+       mp = cur->bc_mp;
+       XFS_WANT_CORRUPTED_RETURN(
+               level > 0 &&
+               ptr != NULLDFSBNO &&
+               XFS_FSB_SANITY_CHECK(mp, ptr));
+       return 0;
+}
+
+#ifdef DEBUG
+/*
+ * Debug routine: check that records are in the right order.
+ */
+void
+xfs_btree_check_rec(
+       xfs_btnum_t     btnum,          /* btree identifier */
+       void            *ar1,           /* pointer to left (lower) record */
+       void            *ar2)           /* pointer to right (higher) record */
+{
+       switch (btnum) {
+       case XFS_BTNUM_BNO: {
+               xfs_alloc_rec_t *r1;
+               xfs_alloc_rec_t *r2;
+
+               r1 = ar1;
+               r2 = ar2;
+               ASSERT(INT_GET(r1->ar_startblock, ARCH_CONVERT) + INT_GET(r1->ar_blockcount, ARCH_CONVERT) <=
+                      INT_GET(r2->ar_startblock, ARCH_CONVERT));
+               break;
+           }
+       case XFS_BTNUM_CNT: {
+               xfs_alloc_rec_t *r1;
+               xfs_alloc_rec_t *r2;
+               
+               r1 = ar1;
+               r2 = ar2;
+               ASSERT(INT_GET(r1->ar_blockcount, ARCH_CONVERT) < INT_GET(r2->ar_blockcount, ARCH_CONVERT) ||
+                      (INT_GET(r1->ar_blockcount, ARCH_CONVERT) == INT_GET(r2->ar_blockcount, ARCH_CONVERT) &&
+                       INT_GET(r1->ar_startblock, ARCH_CONVERT) < INT_GET(r2->ar_startblock, ARCH_CONVERT)));
+               break;
+           }
+       case XFS_BTNUM_BMAP: {
+               xfs_bmbt_rec_t  *r1;
+               xfs_bmbt_rec_t  *r2;
+
+               r1 = ar1;
+               r2 = ar2;
+               ASSERT(xfs_bmbt_get_startoff(r1) +
+                      xfs_bmbt_get_blockcount(r1) <=
+                      xfs_bmbt_get_startoff(r2));
+               break;
+           }
+       case XFS_BTNUM_INO: {
+               xfs_inobt_rec_t *r1;
+               xfs_inobt_rec_t *r2;
+
+               r1 = ar1;
+               r2 = ar2;
+               ASSERT(INT_GET(r1->ir_startino, ARCH_CONVERT) + XFS_INODES_PER_CHUNK <=
+                      INT_GET(r2->ir_startino, ARCH_CONVERT));
+               break;
+           }
+       default:
+               ASSERT(0);
+       }
+}
+#endif /* DEBUG */
+
+/*
+ * Checking routine: check that block header is ok.
+ */
+/* ARGSUSED */
+int                                    /* error (0 or EFSCORRUPTED) */
+xfs_btree_check_sblock(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       xfs_btree_sblock_t      *block, /* btree short form block pointer */
+       int                     level,  /* level of the btree block */
+       xfs_buf_t               *bp)    /* buffer containing block */
+{
+       xfs_buf_t               *agbp;  /* buffer for ag. freespace struct */
+       xfs_agf_t               *agf;   /* ag. freespace structure */
+       xfs_agblock_t           agflen; /* native ag. freespace length */
+       int                     sblock_ok; /* block passes checks */
+
+       agbp = cur->bc_private.a.agbp;
+       agf = XFS_BUF_TO_AGF(agbp);
+       agflen = INT_GET(agf->agf_length, ARCH_CONVERT);
+       sblock_ok =
+               INT_GET(block->bb_magic, ARCH_CONVERT) == xfs_magics[cur->bc_btnum] &&
+               INT_GET(block->bb_level, ARCH_CONVERT) == level &&
+               INT_GET(block->bb_numrecs, ARCH_CONVERT) <=
+                       xfs_btree_maxrecs(cur, (xfs_btree_block_t *)block) &&
+               (INT_GET(block->bb_leftsib, ARCH_CONVERT) == NULLAGBLOCK ||
+                INT_GET(block->bb_leftsib, ARCH_CONVERT) < agflen) &&
+               INT_GET(block->bb_leftsib, ARCH_CONVERT) != 0 &&
+               (INT_GET(block->bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK ||
+                INT_GET(block->bb_rightsib, ARCH_CONVERT) < agflen) &&
+               INT_GET(block->bb_rightsib, ARCH_CONVERT) != 0;
+       if (XFS_TEST_ERROR(!sblock_ok, cur->bc_mp,
+                       XFS_ERRTAG_BTREE_CHECK_SBLOCK,
+                       XFS_RANDOM_BTREE_CHECK_SBLOCK)) {
+#pragma mips_frequency_hint NEVER
+               if (bp)
+                       xfs_buftrace("SBTREE ERROR", bp);
+               return XFS_ERROR(EFSCORRUPTED);
+       }
+       return 0;
+}
+
+/*
+ * Checking routine: check that (short) pointer is ok.
+ */
+int                                    /* error (0 or EFSCORRUPTED) */
+xfs_btree_check_sptr(
+       xfs_btree_cur_t *cur,           /* btree cursor */
+       xfs_agblock_t   ptr,            /* btree block disk address */
+       int             level)          /* btree block level */
+{
+       xfs_buf_t       *agbp;          /* buffer for ag. freespace struct */
+       xfs_agf_t       *agf;           /* ag. freespace structure */
+
+       agbp = cur->bc_private.a.agbp;
+       agf = XFS_BUF_TO_AGF(agbp);
+       XFS_WANT_CORRUPTED_RETURN(
+               level > 0 &&
+               ptr != NULLAGBLOCK && ptr != 0 &&
+               ptr < INT_GET(agf->agf_length, ARCH_CONVERT));
+       return 0;
+}
+
+/*
+ * Delete the btree cursor.
+ */
+void
+xfs_btree_del_cursor(
+       xfs_btree_cur_t *cur,           /* btree cursor */
+       int             error)          /* del because of error */
+{
+       int             i;              /* btree level */
+
+       /*
+        * Clear the buffer pointers, and release the buffers.
+        * If we're doing this in the face of an error, we
+        * need to make sure to inspect all of the entries
+        * in the bc_bufs array for buffers to be unlocked.
+        * This is because some of the btree code works from
+        * level n down to 0, and if we get an error along
+        * the way we won't have initialized all the entries
+        * down to 0.
+        */
+       for (i = 0; i < cur->bc_nlevels; i++) {
+               if (cur->bc_bufs[i])
+                       xfs_btree_setbuf(cur, i, NULL);
+               else if (!error)
+                       break;
+       }
+       /*
+        * Can't free a bmap cursor without having dealt with the 
+        * allocated indirect blocks' accounting.
+        */
+       ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP ||
+              cur->bc_private.b.allocated == 0);
+       /*
+        * Free the cursor.
+        */
+       kmem_zone_free(xfs_btree_cur_zone, cur);
+}
+
+/*
+ * Duplicate the btree cursor.
+ * Allocate a new one, copy the record, re-get the buffers.
+ */
+int                                    /* error */
+xfs_btree_dup_cursor(
+       xfs_btree_cur_t *cur,           /* input cursor */
+       xfs_btree_cur_t **ncur)         /* output cursor */
+{
+       xfs_buf_t       *bp;            /* btree block's buffer pointer */
+       int             error;          /* error return value */
+       int             i;              /* level number of btree block */
+       xfs_mount_t     *mp;            /* mount structure for filesystem */
+       xfs_btree_cur_t *new;           /* new cursor value */
+       xfs_trans_t     *tp;            /* transaction pointer, can be NULL */
+
+       tp = cur->bc_tp;
+       mp = cur->bc_mp;
+       /*
+        * Allocate a new cursor like the old one.
+        */
+       new = xfs_btree_init_cursor(mp, tp, cur->bc_private.a.agbp,
+               cur->bc_private.a.agno, cur->bc_btnum, cur->bc_private.b.ip,
+               cur->bc_private.b.whichfork);
+       /*
+        * Copy the record currently in the cursor.
+        */
+       new->bc_rec = cur->bc_rec;
+       /*
+        * For each level current, re-get the buffer and copy the ptr value.
+        */
+       for (i = 0; i < new->bc_nlevels; i++) {
+               new->bc_ptrs[i] = cur->bc_ptrs[i];
+               new->bc_ra[i] = cur->bc_ra[i];
+               if (bp = cur->bc_bufs[i]) {
+                       if (error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
+                               XFS_BUF_ADDR(bp), mp->m_bsize, 0, &bp)) {
+#pragma mips_frequency_hint NEVER
+                               xfs_btree_del_cursor(new, error);
+                               *ncur = NULL;
+                               return error;
+                       }
+                       new->bc_bufs[i] = bp;
+                       ASSERT(bp);
+                       ASSERT(!XFS_BUF_GETERROR(bp));
+               } else
+                       new->bc_bufs[i] = NULL;
+       }
+       /*
+        * For bmap btrees, copy the firstblock, flist, and flags values,
+        * since init cursor doesn't get them.
+        */
+       if (new->bc_btnum == XFS_BTNUM_BMAP) {
+               new->bc_private.b.firstblock = cur->bc_private.b.firstblock;
+               new->bc_private.b.flist = cur->bc_private.b.flist;
+               new->bc_private.b.flags = cur->bc_private.b.flags;
+       }
+       *ncur = new;
+       return 0;
+}
+
+/*
+ * Change the cursor to point to the first record at the given level.
+ * Other levels are unaffected.
+ */
+int                                    /* success=1, failure=0 */
+xfs_btree_firstrec(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       int                     level)  /* level to change */
+{
+       xfs_btree_block_t       *block; /* generic btree block pointer */
+       xfs_buf_t               *bp;    /* buffer containing block */
+
+       /*
+        * Get the block pointer for this level.
+        */
+       block = xfs_btree_get_block(cur, level, &bp);
+       xfs_btree_check_block(cur, block, level, bp);
+       /*
+        * It's empty, there is no such record.
+        */
+       if (INT_GET(block->bb_h.bb_numrecs, ARCH_CONVERT) == 0)
+               return 0;
+       /*
+        * Set the ptr value to 1, that's the first record/key.
+        */
+       cur->bc_ptrs[level] = 1;
+       return 1;
+}
+
+/* 
+ * Retrieve the block pointer from the cursor at the given level.
+ * This may be a bmap btree root or from a buffer.
+ */
+xfs_btree_block_t *                    /* generic btree block pointer */
+xfs_btree_get_block(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       int                     level,  /* level in btree */
+       xfs_buf_t               **bpp)  /* buffer containing the block */
+{
+       xfs_btree_block_t       *block; /* return value */
+       xfs_buf_t               *bp;    /* return buffer */
+       xfs_ifork_t             *ifp;   /* inode fork pointer */
+       int                     whichfork; /* data or attr fork */
+
+       if (cur->bc_btnum == XFS_BTNUM_BMAP && level == cur->bc_nlevels - 1) {
+               whichfork = cur->bc_private.b.whichfork;
+               ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, whichfork);
+               block = (xfs_btree_block_t *)ifp->if_broot;
+               bp = NULL;
+       } else {
+               bp = cur->bc_bufs[level];
+               block = XFS_BUF_TO_BLOCK(bp);
+       }
+       ASSERT(block != NULL);
+       *bpp = bp;
+       return block;
+}
+
+/*
+ * Get a buffer for the block, return it with no data read.
+ * Long-form addressing.
+ */
+xfs_buf_t *                            /* buffer for fsbno */
+xfs_btree_get_bufl(
+       xfs_mount_t     *mp,            /* file system mount point */
+       xfs_trans_t     *tp,            /* transaction pointer */
+       xfs_fsblock_t   fsbno,          /* file system block number */
+       uint            lock)           /* lock flags for get_buf */
+{
+       xfs_buf_t       *bp;            /* buffer pointer (return value) */
+       xfs_daddr_t             d;              /* real disk block address */
+
+       ASSERT(fsbno != NULLFSBLOCK);
+       d = XFS_FSB_TO_DADDR(mp, fsbno);
+       bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
+       ASSERT(bp);
+       ASSERT(!XFS_BUF_GETERROR(bp));
+       return bp;
+}
+
+/*
+ * Get a buffer for the block, return it with no data read.
+ * Short-form addressing.
+ */
+xfs_buf_t *                            /* buffer for agno/agbno */
+xfs_btree_get_bufs(
+       xfs_mount_t     *mp,            /* file system mount point */
+       xfs_trans_t     *tp,            /* transaction pointer */
+       xfs_agnumber_t  agno,           /* allocation group number */
+       xfs_agblock_t   agbno,          /* allocation group block number */
+       uint            lock)           /* lock flags for get_buf */
+{
+       xfs_buf_t       *bp;            /* buffer pointer (return value) */
+       xfs_daddr_t             d;              /* real disk block address */
+
+       ASSERT(agno != NULLAGNUMBER);
+       ASSERT(agbno != NULLAGBLOCK);
+       d = XFS_AGB_TO_DADDR(mp, agno, agbno);
+       bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
+       ASSERT(bp);
+       ASSERT(!XFS_BUF_GETERROR(bp));
+       return bp;
+}
+
+/*
+ * Allocate a new btree cursor.
+ * The cursor is either for allocation (A) or bmap (B) or inodes (I).
+ */
+xfs_btree_cur_t *                      /* new btree cursor */
+xfs_btree_init_cursor(
+       xfs_mount_t     *mp,            /* file system mount point */
+       xfs_trans_t     *tp,            /* transaction pointer */
+       xfs_buf_t       *agbp,          /* (A only) buffer for agf structure */
+                                       /* (I only) buffer for agi structure */
+       xfs_agnumber_t  agno,           /* (AI only) allocation group number */
+       xfs_btnum_t     btnum,          /* btree identifier */
+       xfs_inode_t     *ip,            /* (B only) inode owning the btree */
+       int             whichfork)      /* (B only) data or attr fork */
+{
+       xfs_agf_t       *agf;           /* (A) allocation group freespace */
+       xfs_agi_t       *agi;           /* (I) allocation group inodespace */
+       xfs_btree_cur_t *cur;           /* return value */
+       xfs_ifork_t     *ifp;           /* (I) inode fork pointer */
+       int             nlevels;        /* number of levels in the btree */
+
+       ASSERT(xfs_btree_cur_zone != NULL);
+       /*
+        * Allocate a new cursor.
+        */
+       cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
+       /* 
+        * Deduce the number of btree levels from the arguments.
+        */
+       switch (btnum) {
+       case XFS_BTNUM_BNO:
+       case XFS_BTNUM_CNT:
+               agf = XFS_BUF_TO_AGF(agbp);
+               nlevels = INT_GET(agf->agf_levels[btnum], ARCH_CONVERT);
+               break;
+       case XFS_BTNUM_BMAP:
+               ifp = XFS_IFORK_PTR(ip, whichfork);
+               nlevels = INT_GET(ifp->if_broot->bb_level, ARCH_CONVERT) + 1;
+               break;
+       case XFS_BTNUM_INO:
+               agi = XFS_BUF_TO_AGI(agbp);
+               nlevels = INT_GET(agi->agi_level, ARCH_CONVERT);
+               break;
+       default:
+               ASSERT(0);
+       }
+       /*
+        * Fill in the common fields.
+        */
+       cur->bc_tp = tp;
+       cur->bc_mp = mp;
+       cur->bc_nlevels = nlevels;
+       cur->bc_btnum = btnum;
+       cur->bc_blocklog = mp->m_sb.sb_blocklog;
+       /*
+        * Fill in private fields.
+        */
+       switch (btnum) {
+       case XFS_BTNUM_BNO:
+       case XFS_BTNUM_CNT:
+               /*
+                * Allocation btree fields.
+                */
+               cur->bc_private.a.agbp = agbp;
+               cur->bc_private.a.agno = agno;
+               break;
+       case XFS_BTNUM_BMAP:
+               /*
+                * Bmap btree fields.
+                */
+               cur->bc_private.b.forksize = XFS_IFORK_SIZE(ip, whichfork);
+               cur->bc_private.b.ip = ip;
+               cur->bc_private.b.firstblock = NULLFSBLOCK;
+               cur->bc_private.b.flist = NULL;
+               cur->bc_private.b.allocated = 0;
+               cur->bc_private.b.flags = 0;
+               cur->bc_private.b.whichfork = whichfork;
+               break;
+       case XFS_BTNUM_INO:
+               /*
+                * Inode allocation btree fields.
+                */
+               cur->bc_private.i.agbp = agbp;
+               cur->bc_private.i.agno = agno;
+               break;
+       default:
+               ASSERT(0);
+       }
+       return cur;
+}
+
+/*
+ * Check for the cursor referring to the last block at the given level.
+ */
+int                                    /* 1=is last block, 0=not last block */
+xfs_btree_islastblock(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       int                     level)  /* level to check */
+{
+       xfs_btree_block_t       *block; /* generic btree block pointer */
+       xfs_buf_t               *bp;    /* buffer containing block */
+
+       block = xfs_btree_get_block(cur, level, &bp);
+       xfs_btree_check_block(cur, block, level, bp);
+       if (XFS_BTREE_LONG_PTRS(cur->bc_btnum))
+               return INT_GET(block->bb_u.l.bb_rightsib, ARCH_CONVERT) == NULLDFSBNO;
+       else
+               return INT_GET(block->bb_u.s.bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK;
+}
+
+/*
+ * Change the cursor to point to the last record in the current block
+ * at the given level.  Other levels are unaffected.
+ */
+int                                    /* success=1, failure=0 */
+xfs_btree_lastrec(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       int                     level)  /* level to change */
+{
+       xfs_btree_block_t       *block; /* generic btree block pointer */
+       xfs_buf_t               *bp;    /* buffer containing block */
+
+       /*
+        * Get the block pointer for this level.
+        */
+       block = xfs_btree_get_block(cur, level, &bp);
+       xfs_btree_check_block(cur, block, level, bp);
+       /*
+        * It's empty, there is no such record.
+        */
+       if (INT_GET(block->bb_h.bb_numrecs, ARCH_CONVERT) == 0)
+               return 0;
+       /*
+        * Set the ptr value to numrecs, that's the last record/key.
+        */
+       cur->bc_ptrs[level] = INT_GET(block->bb_h.bb_numrecs, ARCH_CONVERT);
+       return 1;
+}
+
+/*
+ * Compute first and last byte offsets for the fields given.
+ * Interprets the offsets table, which contains struct field offsets.
+ */
+void
+xfs_btree_offsets(
+       __int64_t       fields,         /* bitmask of fields */
+       const short     *offsets,       /* table of field offsets */
+       int             nbits,          /* number of bits to inspect */
+       int             *first,         /* output: first byte offset */
+       int             *last)          /* output: last byte offset */
+{
+       int             i;              /* current bit number */
+       __int64_t       imask;          /* mask for current bit number */
+
+       ASSERT(fields != 0);
+       /*
+        * Find the lowest bit, so the first byte offset.
+        */
+       for (i = 0, imask = 1LL; ; i++, imask <<= 1) {
+               if (imask & fields) {
+                       *first = offsets[i];
+                       break;
+               }
+       }
+       /*
+        * Find the highest bit, so the last byte offset.
+        */
+       for (i = nbits - 1, imask = 1LL << i; ; i--, imask >>= 1) {
+               if (imask & fields) {
+                       *last = offsets[i + 1] - 1;
+                       break;
+               }
+       }
+}
+
+/*
+ * Get a buffer for the block, return it read in.
+ * Long-form addressing.
+ */
+int                                    /* error */
+xfs_btree_read_bufl(
+       xfs_mount_t     *mp,            /* file system mount point */
+       xfs_trans_t     *tp,            /* transaction pointer */
+       xfs_fsblock_t   fsbno,          /* file system block number */
+       uint            lock,           /* lock flags for read_buf */
+       xfs_buf_t       **bpp,          /* buffer for fsbno */
+       int             refval)         /* ref count value for buffer */
+{
+       xfs_buf_t       *bp;            /* return value */
+       xfs_daddr_t             d;              /* real disk block address */
+       int             error;
+
+       ASSERT(fsbno != NULLFSBLOCK);
+       d = XFS_FSB_TO_DADDR(mp, fsbno);
+       if (error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
+                       mp->m_bsize, lock, &bp)) {
+#pragma mips_frequency_hint NEVER
+               return error;
+       }
+       ASSERT(!bp || !XFS_BUF_GETERROR(bp));
+       if (bp != NULL) {
+               XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval);
+       }
+       *bpp = bp;
+       return 0;
+}
+
+/*
+ * Get a buffer for the block, return it read in.
+ * Short-form addressing.
+ */
+int                                    /* error */
+xfs_btree_read_bufs(
+       xfs_mount_t     *mp,            /* file system mount point */
+       xfs_trans_t     *tp,            /* transaction pointer */
+       xfs_agnumber_t  agno,           /* allocation group number */
+       xfs_agblock_t   agbno,          /* allocation group block number */
+       uint            lock,           /* lock flags for read_buf */
+       xfs_buf_t       **bpp,          /* buffer for agno/agbno */
+       int             refval)         /* ref count value for buffer */
+{
+       xfs_buf_t       *bp;            /* return value */
+       xfs_daddr_t             d;              /* real disk block address */
+       int             error;
+
+       ASSERT(agno != NULLAGNUMBER);
+       ASSERT(agbno != NULLAGBLOCK);
+       d = XFS_AGB_TO_DADDR(mp, agno, agbno);
+       if (error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
+                                       mp->m_bsize, lock, &bp)) {
+#pragma mips_frequency_hint NEVER
+               return error;
+       }
+       ASSERT(!bp || !XFS_BUF_GETERROR(bp));
+       if (bp != NULL)
+               switch (refval) {
+               case XFS_ALLOC_BTREE_REF:
+                       XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval);
+                       break;
+               case XFS_INO_BTREE_REF:
+                       XFS_BUF_SET_VTYPE_REF(bp, B_FS_INOMAP, refval);
+                       break;
+               }
+       *bpp = bp;
+       return 0;
+}
+
+/*
+ * Read-ahead btree blocks, at the given level.
+ * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
+ */
+int
+xfs_btree_readahead(
+       xfs_btree_cur_t         *cur,           /* btree cursor */
+       int                     lev,            /* level in btree */
+       int                     lr)             /* left/right bits */
+{
+       xfs_alloc_block_t       *a;
+       xfs_bmbt_block_t        *b;
+       xfs_inobt_block_t       *i;
+       int                     rval = 0;
+
+       ASSERT(cur->bc_bufs[lev] != NULL);
+       if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev])
+               return 0;
+       cur->bc_ra[lev] |= lr;
+       switch (cur->bc_btnum) {
+       case XFS_BTNUM_BNO:
+       case XFS_BTNUM_CNT:
+               a = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[lev]);
+               if ((lr & XFS_BTCUR_LEFTRA) && INT_GET(a->bb_leftsib, ARCH_CONVERT) != NULLAGBLOCK) {
+                       xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
+                               INT_GET(a->bb_leftsib, ARCH_CONVERT), 1);
+                       rval++;
+               }
+               if ((lr & XFS_BTCUR_RIGHTRA) && INT_GET(a->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) {
+                       xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
+                               INT_GET(a->bb_rightsib, ARCH_CONVERT), 1);
+                       rval++;
+               }
+               break;
+       case XFS_BTNUM_BMAP:
+               b = XFS_BUF_TO_BMBT_BLOCK(cur->bc_bufs[lev]);
+               if ((lr & XFS_BTCUR_LEFTRA) && INT_GET(b->bb_leftsib, ARCH_CONVERT) != NULLDFSBNO) {
+                       xfs_btree_reada_bufl(cur->bc_mp, INT_GET(b->bb_leftsib, ARCH_CONVERT), 1);
+                       rval++;
+               }
+               if ((lr & XFS_BTCUR_RIGHTRA) && INT_GET(b->bb_rightsib, ARCH_CONVERT) != NULLDFSBNO) {
+                       xfs_btree_reada_bufl(cur->bc_mp, INT_GET(b->bb_rightsib, ARCH_CONVERT), 1);
+                       rval++;
+               }
+               break;
+       case XFS_BTNUM_INO:
+               i = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[lev]);
+               if ((lr & XFS_BTCUR_LEFTRA) && INT_GET(i->bb_leftsib, ARCH_CONVERT) != NULLAGBLOCK) {
+                       xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.i.agno,
+                               INT_GET(i->bb_leftsib, ARCH_CONVERT), 1);
+                       rval++;
+               }
+               if ((lr & XFS_BTCUR_RIGHTRA) && INT_GET(i->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) {
+                       xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.i.agno,
+                               INT_GET(i->bb_rightsib, ARCH_CONVERT), 1);
+                       rval++;
+               }
+               break;
+       default:
+               ASSERT(0);
+       }
+       return rval;
+}
+
+/*
+ * Set the buffer for level "lev" in the cursor to bp, releasing
+ * any previous buffer.
+ */
+void
+xfs_btree_setbuf(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       int                     lev,    /* level in btree */
+       xfs_buf_t               *bp)    /* new buffer to set */
+{
+       xfs_btree_block_t       *b;     /* btree block */
+       xfs_buf_t               *obp;   /* old buffer pointer */
+
+       obp = cur->bc_bufs[lev];
+       if (obp)
+               xfs_trans_brelse(cur->bc_tp, obp);
+       cur->bc_bufs[lev] = bp;
+       cur->bc_ra[lev] = 0;
+       if (!bp)
+               return;
+       b = XFS_BUF_TO_BLOCK(bp);
+       if (XFS_BTREE_LONG_PTRS(cur->bc_btnum)) {
+               if (INT_GET(b->bb_u.l.bb_leftsib, ARCH_CONVERT) == NULLDFSBNO)
+                       cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
+               if (INT_GET(b->bb_u.l.bb_rightsib, ARCH_CONVERT) == NULLDFSBNO)
+                       cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
+       } else {
+               if (INT_GET(b->bb_u.s.bb_leftsib, ARCH_CONVERT) == NULLAGBLOCK)
+                       cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
+               if (INT_GET(b->bb_u.s.bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK)
+                       cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
+       }
+}
diff --git a/libxfs/xfs_da_btree.c b/libxfs/xfs_da_btree.c
new file mode 100644 (file)
index 0000000..37ad626
--- /dev/null
@@ -0,0 +1,2524 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <xfs.h>
+
+/*
+ * xfs_da_btree.c
+ *
+ * Routines to implement directories as Btrees of hashed names.
+ */
+
+
+/*========================================================================
+ * Routines used for growing the Btree.
+ *========================================================================*/
+
+/*
+ * Create the initial contents of an intermediate node.
+ */
+int
+xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level,
+                                xfs_dabuf_t **bpp, int whichfork)
+{
+       xfs_da_intnode_t *node;
+       xfs_dabuf_t *bp;
+       int error;
+       xfs_trans_t *tp;
+
+       tp = args->trans;
+       error = xfs_da_get_buf(tp, args->dp, blkno, -1, &bp, whichfork);
+       if (error)
+               return(error);
+       ASSERT(bp != NULL);
+       node = bp->data;
+       INT_ZERO(node->hdr.info.forw, ARCH_CONVERT);
+        INT_ZERO(node->hdr.info.back, ARCH_CONVERT);
+       INT_SET(node->hdr.info.magic, ARCH_CONVERT, XFS_DA_NODE_MAGIC);
+       INT_ZERO(node->hdr.info.pad, ARCH_CONVERT);
+       INT_ZERO(node->hdr.count, ARCH_CONVERT);
+       INT_SET(node->hdr.level, ARCH_CONVERT, level);
+
+       xfs_da_log_buf(tp, bp,
+               XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
+
+       *bpp = bp;
+       return(0);
+}
+
+/*
+ * Split a leaf node, rebalance, then possibly split
+ * intermediate nodes, rebalance, etc.
+ */
+int                                                    /* error */
+xfs_da_split(xfs_da_state_t *state)
+{
+       xfs_da_state_blk_t *oldblk, *newblk, *addblk;
+       xfs_da_intnode_t *node;
+       xfs_dabuf_t *bp;
+       int max, action, error, i;
+
+       /*
+        * Walk back up the tree splitting/inserting/adjusting as necessary.
+        * If we need to insert and there isn't room, split the node, then
+        * decide which fragment to insert the new block from below into.
+        * Note that we may split the root this way, but we need more fixup.
+        */
+       max = state->path.active - 1;
+       ASSERT((max >= 0) && (max < XFS_DA_NODE_MAXDEPTH));
+       ASSERT(state->path.blk[max].magic == XFS_ATTR_LEAF_MAGIC ||
+              state->path.blk[max].magic == XFS_DIRX_LEAF_MAGIC(state->mp));
+
+       addblk = &state->path.blk[max];         /* initial dummy value */
+       for (i = max; (i >= 0) && addblk; state->path.active--, i--) {
+               oldblk = &state->path.blk[i];
+               newblk = &state->altpath.blk[i];
+
+               /*
+                * If a leaf node then
+                *     Allocate a new leaf node, then rebalance across them.
+                * else if an intermediate node then
+                *     We split on the last layer, must we split the node?
+                */
+               switch (oldblk->magic) {
+               case XFS_ATTR_LEAF_MAGIC:
+#ifndef __KERNEL__
+                       return(ENOTTY);
+#else
+                       error = xfs_attr_leaf_split(state, oldblk, newblk);
+                       if ((error != 0) && (error != ENOSPC)) {
+                               return(error);  /* GROT: attr is inconsistent */
+                       }
+                       if (!error) {
+                               addblk = newblk;
+                               break;
+                       }
+                       /*
+                        * Entry wouldn't fit, split the leaf again.
+                        */
+                       state->extravalid = 1;
+                       if (state->inleaf) {
+                               state->extraafter = 0;  /* before newblk */
+                               error = xfs_attr_leaf_split(state, oldblk,
+                                                           &state->extrablk);
+                       } else {
+                               state->extraafter = 1;  /* after newblk */
+                               error = xfs_attr_leaf_split(state, newblk,
+                                                           &state->extrablk);
+                       }
+                       if (error)
+                               return(error);  /* GROT: attr inconsistent */
+                       addblk = newblk;
+                       break;
+#endif
+               case XFS_DIR_LEAF_MAGIC:
+                       ASSERT(XFS_DIR_IS_V1(state->mp));
+                       error = xfs_dir_leaf_split(state, oldblk, newblk);
+                       if ((error != 0) && (error != ENOSPC)) {
+                               return(error);  /* GROT: dir is inconsistent */
+                       }
+                       if (!error) {
+                               addblk = newblk;
+                               break;
+                       }
+                       /*
+                        * Entry wouldn't fit, split the leaf again.
+                        */
+                       state->extravalid = 1;
+                       if (state->inleaf) {
+                               state->extraafter = 0;  /* before newblk */
+                               error = xfs_dir_leaf_split(state, oldblk,
+                                                          &state->extrablk);
+                               if (error)
+                                       return(error);  /* GROT: dir incon. */
+                               addblk = newblk;
+                       } else {
+                               state->extraafter = 1;  /* after newblk */
+                               error = xfs_dir_leaf_split(state, newblk,
+                                                          &state->extrablk);
+                               if (error)
+                                       return(error);  /* GROT: dir incon. */
+                               addblk = newblk;
+                       }
+                       break;
+               case XFS_DIR2_LEAFN_MAGIC:
+                       ASSERT(XFS_DIR_IS_V2(state->mp));
+                       error = xfs_dir2_leafn_split(state, oldblk, newblk);
+                       if (error)
+                               return error;
+                       addblk = newblk;
+                       break;
+               case XFS_DA_NODE_MAGIC:
+                       error = xfs_da_node_split(state, oldblk, newblk, addblk,
+                                                        max - i, &action);
+                       xfs_da_buf_done(addblk->bp);
+                       addblk->bp = NULL;
+                       if (error)
+                               return(error);  /* GROT: dir is inconsistent */
+                       /*
+                        * Record the newly split block for the next time thru?
+                        */
+                       if (action)
+                               addblk = newblk;
+                       else
+                               addblk = NULL;
+                       break;
+               }
+
+               /*
+                * Update the btree to show the new hashval for this child.
+                */
+               xfs_da_fixhashpath(state, &state->path);
+               /*
+                * If we won't need this block again, it's getting dropped
+                * from the active path by the loop control, so we need
+                * to mark it done now.
+                */
+               if (i > 0 || !addblk)
+                       xfs_da_buf_done(oldblk->bp);
+       }
+       if (!addblk)
+               return(0);
+
+       /*
+        * Split the root node.
+        */
+       ASSERT(state->path.active == 0);
+       oldblk = &state->path.blk[0];
+       error = xfs_da_root_split(state, oldblk, addblk);
+       if (error) {
+               xfs_da_buf_done(oldblk->bp);
+               xfs_da_buf_done(addblk->bp);
+               addblk->bp = NULL;
+               return(error);  /* GROT: dir is inconsistent */
+       }
+
+       /*
+        * Update pointers to the node which used to be block 0 and
+        * just got bumped because of the addition of a new root node.
+        * There might be three blocks involved if a double split occurred,
+        * and the original block 0 could be at any position in the list.
+        */
+
+       node = oldblk->bp->data;
+       if (!INT_ISZERO(node->hdr.info.forw, ARCH_CONVERT)) {
+               if (INT_GET(node->hdr.info.forw, ARCH_CONVERT) == addblk->blkno) {
+                       bp = addblk->bp;
+               } else {
+                       ASSERT(state->extravalid);
+                       bp = state->extrablk.bp;
+               }
+               node = bp->data;
+               INT_SET(node->hdr.info.back, ARCH_CONVERT, oldblk->blkno);
+               xfs_da_log_buf(state->args->trans, bp,
+                   XFS_DA_LOGRANGE(node, &node->hdr.info,
+                   sizeof(node->hdr.info)));
+       }
+       node = oldblk->bp->data;
+       if (INT_GET(node->hdr.info.back, ARCH_CONVERT)) {
+               if (INT_GET(node->hdr.info.back, ARCH_CONVERT) == addblk->blkno) {
+                       bp = addblk->bp;
+               } else {
+                       ASSERT(state->extravalid);
+                       bp = state->extrablk.bp;
+               }
+               node = bp->data;
+               INT_SET(node->hdr.info.forw, ARCH_CONVERT, oldblk->blkno);
+               xfs_da_log_buf(state->args->trans, bp,
+                   XFS_DA_LOGRANGE(node, &node->hdr.info,
+                   sizeof(node->hdr.info)));
+       }
+       xfs_da_buf_done(oldblk->bp);
+       xfs_da_buf_done(addblk->bp);
+       addblk->bp = NULL;
+       return(0);
+}
+
+/*
+ * Split the root.  We have to create a new root and point to the two
+ * parts (the split old root) that we just created.  Copy block zero to
+ * the EOF, extending the inode in process.
+ */
+STATIC int                                             /* error */
+xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
+                                xfs_da_state_blk_t *blk2)
+{
+       xfs_da_intnode_t *node, *oldroot;
+       xfs_da_args_t *args;
+       xfs_dablk_t blkno;
+       xfs_dabuf_t *bp;
+       int error, size;
+       xfs_inode_t *dp;
+       xfs_trans_t *tp;
+       xfs_mount_t *mp;
+       xfs_dir2_leaf_t *leaf;
+
+       /*
+        * Copy the existing (incorrect) block from the root node position
+        * to a free space somewhere.
+        */
+       args = state->args;
+       ASSERT(args != NULL);
+       error = xfs_da_grow_inode(args, &blkno);
+       if (error)
+               return(error);
+       dp = args->dp;
+       tp = args->trans;
+       mp = state->mp;
+       error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, args->whichfork);
+       if (error)
+               return(error);
+       ASSERT(bp != NULL);
+       node = bp->data;
+       oldroot = blk1->bp->data;
+       if (INT_GET(oldroot->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC) {
+               size = (int)((char *)&oldroot->btree[INT_GET(oldroot->hdr.count, ARCH_CONVERT)] -
+                            (char *)oldroot);
+       } else {
+               ASSERT(XFS_DIR_IS_V2(mp));
+               ASSERT(INT_GET(oldroot->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
+               leaf = (xfs_dir2_leaf_t *)oldroot;
+               size = (int)((char *)&leaf->ents[INT_GET(leaf->hdr.count, ARCH_CONVERT)] -
+                            (char *)leaf);
+       }
+       bcopy(oldroot, node, size);
+       xfs_da_log_buf(tp, bp, 0, size - 1);
+       xfs_da_buf_done(blk1->bp);
+       blk1->bp = bp;
+       blk1->blkno = blkno;
+
+       /*
+        * Set up the new root node.
+        */
+       error = xfs_da_node_create(args,
+               args->whichfork == XFS_DATA_FORK &&
+               XFS_DIR_IS_V2(mp) ? mp->m_dirleafblk : 0,
+               INT_GET(node->hdr.level, ARCH_CONVERT) + 1, &bp, args->whichfork);
+       if (error)
+               return(error);
+       node = bp->data;
+       INT_SET(node->btree[0].hashval, ARCH_CONVERT, blk1->hashval);
+       INT_SET(node->btree[0].before, ARCH_CONVERT, blk1->blkno);
+       INT_SET(node->btree[1].hashval, ARCH_CONVERT, blk2->hashval);
+       INT_SET(node->btree[1].before, ARCH_CONVERT, blk2->blkno);
+       INT_SET(node->hdr.count, ARCH_CONVERT, 2);
+       if (XFS_DIR_IS_V2(mp)) {
+               ASSERT(blk1->blkno >= mp->m_dirleafblk &&
+                      blk1->blkno < mp->m_dirfreeblk);
+               ASSERT(blk2->blkno >= mp->m_dirleafblk &&
+                      blk2->blkno < mp->m_dirfreeblk);
+       }
+       /* Header is already logged by xfs_da_node_create */
+       xfs_da_log_buf(tp, bp,
+               XFS_DA_LOGRANGE(node, node->btree,
+                       sizeof(xfs_da_node_entry_t) * 2));
+       xfs_da_buf_done(bp);
+
+       return(0);
+}
+
+/*
+ * Split the node, rebalance, then add the new entry.
+ */
+STATIC int                                             /* error */
+xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
+                                xfs_da_state_blk_t *newblk,
+                                xfs_da_state_blk_t *addblk,
+                                int treelevel, int *result)
+{
+       xfs_da_intnode_t *node;
+       xfs_dablk_t blkno;
+       int newcount, error;
+       int useextra;
+
+       node = oldblk->bp->data;
+       ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
+
+       /*
+        * With V2 the extra block is data or freespace.
+        */
+       useextra = state->extravalid && XFS_DIR_IS_V1(state->mp);
+       newcount = 1 + useextra;
+       /*
+        * Do we have to split the node?
+        */
+       if ((INT_GET(node->hdr.count, ARCH_CONVERT) + newcount) > XFS_DA_NODE_ENTRIES(state->mp)) {
+               /*
+                * Allocate a new node, add to the doubly linked chain of
+                * nodes, then move some of our excess entries into it.
+                */
+               error = xfs_da_grow_inode(state->args, &blkno);
+               if (error)
+                       return(error);  /* GROT: dir is inconsistent */
+               
+               error = xfs_da_node_create(state->args, blkno, treelevel,
+                                          &newblk->bp, state->args->whichfork);
+               if (error)
+                       return(error);  /* GROT: dir is inconsistent */
+               newblk->blkno = blkno;
+               newblk->magic = XFS_DA_NODE_MAGIC;
+               xfs_da_node_rebalance(state, oldblk, newblk);
+               error = xfs_da_blk_link(state, oldblk, newblk);
+               if (error)
+                       return(error);
+               *result = 1;
+       } else {
+               *result = 0;
+       }
+
+       /*
+        * Insert the new entry(s) into the correct block
+        * (updating last hashval in the process).
+        *
+        * xfs_da_node_add() inserts BEFORE the given index,
+        * and as a result of using node_lookup_int() we always
+        * point to a valid entry (not after one), but a split
+        * operation always results in a new block whose hashvals
+        * FOLLOW the current block.
+        *
+        * If we had double-split op below us, then add the extra block too.
+        */
+       node = oldblk->bp->data;
+       if (oldblk->index <= INT_GET(node->hdr.count, ARCH_CONVERT)) {
+               oldblk->index++;
+               xfs_da_node_add(state, oldblk, addblk);
+               if (useextra) {
+                       if (state->extraafter)
+                               oldblk->index++;
+                       xfs_da_node_add(state, oldblk, &state->extrablk);
+                       state->extravalid = 0;
+               }
+       } else {
+               newblk->index++;
+               xfs_da_node_add(state, newblk, addblk);
+               if (useextra) {
+                       if (state->extraafter)
+                               newblk->index++;
+                       xfs_da_node_add(state, newblk, &state->extrablk);
+                       state->extravalid = 0;
+               }
+       }
+
+       return(0);
+}
+
+/*
+ * Balance the btree elements between two intermediate nodes,
+ * usually one full and one empty.
+ *
+ * NOTE: if blk2 is empty, then it will get the upper half of blk1.
+ */
+STATIC void
+xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
+                                    xfs_da_state_blk_t *blk2)
+{
+       xfs_da_intnode_t *node1, *node2, *tmpnode;
+       xfs_da_node_entry_t *btree_s, *btree_d;
+       int count, tmp;
+       xfs_trans_t *tp;
+
+       node1 = blk1->bp->data;
+       node2 = blk2->bp->data;
+       /*
+        * Figure out how many entries need to move, and in which direction.
+        * Swap the nodes around if that makes it simpler.
+        */
+       if ((INT_GET(node1->hdr.count, ARCH_CONVERT) > 0) && (INT_GET(node2->hdr.count, ARCH_CONVERT) > 0) &&
+           ((INT_GET(node2->btree[ 0 ].hashval, ARCH_CONVERT) < INT_GET(node1->btree[ 0 ].hashval, ARCH_CONVERT)) ||
+            (INT_GET(node2->btree[ INT_GET(node2->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT) <
+             INT_GET(node1->btree[ INT_GET(node1->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT)))) {
+               tmpnode = node1;
+               node1 = node2;
+               node2 = tmpnode;
+       }
+       ASSERT(INT_GET(node1->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
+       ASSERT(INT_GET(node2->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
+       count = (INT_GET(node1->hdr.count, ARCH_CONVERT) - INT_GET(node2->hdr.count, ARCH_CONVERT)) / 2;
+       if (count == 0)
+               return;
+       tp = state->args->trans;
+       /*
+        * Two cases: high-to-low and low-to-high.
+        */
+       if (count > 0) {
+               /*
+                * Move elements in node2 up to make a hole.
+                */
+               if ((tmp = INT_GET(node2->hdr.count, ARCH_CONVERT)) > 0) {
+                       tmp *= (uint)sizeof(xfs_da_node_entry_t);
+                       btree_s = &node2->btree[0];
+                       btree_d = &node2->btree[count];
+                       ovbcopy(btree_s, btree_d, tmp);
+               }
+
+               /*
+                * Move the req'd B-tree elements from high in node1 to
+                * low in node2.
+                */
+               INT_MOD(node2->hdr.count, ARCH_CONVERT, count);
+               tmp = count * (uint)sizeof(xfs_da_node_entry_t);
+               btree_s = &node1->btree[INT_GET(node1->hdr.count, ARCH_CONVERT) - count];
+               btree_d = &node2->btree[0];
+               bcopy(btree_s, btree_d, tmp);
+               INT_MOD(node1->hdr.count, ARCH_CONVERT, -(count));
+
+       } else {
+               /*
+                * Move the req'd B-tree elements from low in node2 to
+                * high in node1.
+                */
+               count = -count;
+               tmp = count * (uint)sizeof(xfs_da_node_entry_t);
+               btree_s = &node2->btree[0];
+               btree_d = &node1->btree[INT_GET(node1->hdr.count, ARCH_CONVERT)];
+               bcopy(btree_s, btree_d, tmp);
+               INT_MOD(node1->hdr.count, ARCH_CONVERT, count);
+               xfs_da_log_buf(tp, blk1->bp,
+                       XFS_DA_LOGRANGE(node1, btree_d, tmp));
+
+               /*
+                * Move elements in node2 down to fill the hole.
+                */
+               tmp  = INT_GET(node2->hdr.count, ARCH_CONVERT) - count;
+               tmp *= (uint)sizeof(xfs_da_node_entry_t);
+               btree_s = &node2->btree[count];
+               btree_d = &node2->btree[0];
+               ovbcopy(btree_s, btree_d, tmp);
+               INT_MOD(node2->hdr.count, ARCH_CONVERT, -(count));
+       }
+
+       /*
+        * Log header of node 1 and all current bits of node 2.
+        */
+       xfs_da_log_buf(tp, blk1->bp,
+               XFS_DA_LOGRANGE(node1, &node1->hdr, sizeof(node1->hdr)));
+       xfs_da_log_buf(tp, blk2->bp,
+               XFS_DA_LOGRANGE(node2, &node2->hdr,
+                       sizeof(node2->hdr) +
+                       sizeof(node2->btree[0]) * INT_GET(node2->hdr.count, ARCH_CONVERT)));
+
+       /*
+        * Record the last hashval from each block for upward propagation.
+        * (note: don't use the swapped node pointers)
+        */
+       node1 = blk1->bp->data;
+       node2 = blk2->bp->data;
+       blk1->hashval = INT_GET(node1->btree[ INT_GET(node1->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
+       blk2->hashval = INT_GET(node2->btree[ INT_GET(node2->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
+
+       /*
+        * Adjust the expected index for insertion.
+        */
+       if (blk1->index >= INT_GET(node1->hdr.count, ARCH_CONVERT)) {
+               blk2->index = blk1->index - INT_GET(node1->hdr.count, ARCH_CONVERT);
+               blk1->index = INT_GET(node1->hdr.count, ARCH_CONVERT) + 1;      /* make it invalid */
+       }
+}
+
+/*
+ * Add a new entry to an intermediate node.
+ */
+STATIC void
+xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
+                              xfs_da_state_blk_t *newblk)
+{
+       xfs_da_intnode_t *node;
+       xfs_da_node_entry_t *btree;
+       int tmp;
+       xfs_mount_t *mp;
+
+       node = oldblk->bp->data;
+       mp = state->mp;
+       ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
+       ASSERT((oldblk->index >= 0) && (oldblk->index <= INT_GET(node->hdr.count, ARCH_CONVERT)));
+       ASSERT(newblk->blkno != 0);
+       if (state->args->whichfork == XFS_DATA_FORK && XFS_DIR_IS_V2(mp))
+               ASSERT(newblk->blkno >= mp->m_dirleafblk &&
+                      newblk->blkno < mp->m_dirfreeblk);
+
+       /*
+        * We may need to make some room before we insert the new node.
+        */
+       tmp = 0;
+       btree = &node->btree[ oldblk->index ];
+       if (oldblk->index < INT_GET(node->hdr.count, ARCH_CONVERT)) {
+               tmp = (INT_GET(node->hdr.count, ARCH_CONVERT) - oldblk->index) * (uint)sizeof(*btree);
+               ovbcopy(btree, btree + 1, tmp);
+       }
+       INT_SET(btree->hashval, ARCH_CONVERT, newblk->hashval);
+       INT_SET(btree->before, ARCH_CONVERT, newblk->blkno);
+       xfs_da_log_buf(state->args->trans, oldblk->bp,
+               XFS_DA_LOGRANGE(node, btree, tmp + sizeof(*btree)));
+       INT_MOD(node->hdr.count, ARCH_CONVERT, +1);
+       xfs_da_log_buf(state->args->trans, oldblk->bp,
+               XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
+
+       /*
+        * Copy the last hash value from the oldblk to propagate upwards.
+        */
+       oldblk->hashval = INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
+}
+
+/*========================================================================
+ * Routines used for shrinking the Btree.
+ *========================================================================*/
+
+/*
+ * Deallocate an empty leaf node, remove it from its parent,
+ * possibly deallocating that block, etc...
+ */
+int
+xfs_da_join(xfs_da_state_t *state)
+{
+       xfs_da_state_blk_t *drop_blk, *save_blk;
+       int action, error;
+
+       action = 0;
+       drop_blk = &state->path.blk[ state->path.active-1 ];
+       save_blk = &state->altpath.blk[ state->path.active-1 ];
+       ASSERT(state->path.blk[0].magic == XFS_DA_NODE_MAGIC);
+       ASSERT(drop_blk->magic == XFS_ATTR_LEAF_MAGIC ||
+              drop_blk->magic == XFS_DIRX_LEAF_MAGIC(state->mp));
+
+       /*
+        * Walk back up the tree joining/deallocating as necessary.
+        * When we stop dropping blocks, break out.
+        */
+       for (  ; state->path.active >= 2; drop_blk--, save_blk--,
+                state->path.active--) {
+               /*
+                * See if we can combine the block with a neighbor.
+                *   (action == 0) => no options, just leave
+                *   (action == 1) => coalesce, then unlink
+                *   (action == 2) => block empty, unlink it
+                */
+               switch (drop_blk->magic) {
+               case XFS_ATTR_LEAF_MAGIC:
+#ifndef __KERNEL__
+                       error = ENOTTY;
+#else
+                       error = xfs_attr_leaf_toosmall(state, &action);
+#endif
+                       if (error)
+                               return(error);
+                       if (action == 0)
+                               return(0);
+#ifdef __KERNEL__
+                       xfs_attr_leaf_unbalance(state, drop_blk, save_blk);
+#endif
+                       break;
+               case XFS_DIR_LEAF_MAGIC:
+                       ASSERT(XFS_DIR_IS_V1(state->mp));
+                       error = xfs_dir_leaf_toosmall(state, &action);
+                       if (error)
+                               return(error);
+                       if (action == 0)
+                               return(0);
+                       xfs_dir_leaf_unbalance(state, drop_blk, save_blk);
+                       break;
+               case XFS_DIR2_LEAFN_MAGIC:
+                       ASSERT(XFS_DIR_IS_V2(state->mp));
+                       error = xfs_dir2_leafn_toosmall(state, &action);
+                       if (error)
+                               return error;
+                       if (action == 0)
+                               return 0;
+                       xfs_dir2_leafn_unbalance(state, drop_blk, save_blk);
+                       break;
+               case XFS_DA_NODE_MAGIC:
+                       /*
+                        * Remove the offending node, fixup hashvals,
+                        * check for a toosmall neighbor.
+                        */
+                       xfs_da_node_remove(state, drop_blk);
+                       xfs_da_fixhashpath(state, &state->path);
+                       error = xfs_da_node_toosmall(state, &action);
+                       if (error)
+                               return(error);
+                       if (action == 0)
+                               return 0;
+                       xfs_da_node_unbalance(state, drop_blk, save_blk);
+                       break;
+               }
+               xfs_da_fixhashpath(state, &state->altpath);
+               error = xfs_da_blk_unlink(state, drop_blk, save_blk);
+               xfs_da_state_kill_altpath(state);
+               if (error)
+                       return(error);
+               error = xfs_da_shrink_inode(state->args, drop_blk->blkno,
+                                                        drop_blk->bp);
+               drop_blk->bp = NULL;
+               if (error)
+                       return(error);
+       }
+       /*
+        * We joined all the way to the top.  If it turns out that
+        * we only have one entry in the root, make the child block
+        * the new root.
+        */
+       xfs_da_node_remove(state, drop_blk);
+       xfs_da_fixhashpath(state, &state->path);
+       error = xfs_da_root_join(state, &state->path.blk[0]);
+       return(error);
+}
+
+/*
+ * We have only one entry in the root.  Copy the only remaining child of
+ * the old root to block 0 as the new root node.
+ */
+STATIC int
+xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk)
+{
+       xfs_da_intnode_t *oldroot;
+       /* REFERENCED */
+       xfs_da_blkinfo_t *blkinfo;
+       xfs_da_args_t *args;
+       xfs_dablk_t child;
+       xfs_dabuf_t *bp;
+       int error;
+
+       args = state->args;
+       ASSERT(args != NULL);
+       ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC);
+       oldroot = root_blk->bp->data;
+       ASSERT(INT_GET(oldroot->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
+       ASSERT(INT_ISZERO(oldroot->hdr.info.forw, ARCH_CONVERT));
+       ASSERT(INT_ISZERO(oldroot->hdr.info.back, ARCH_CONVERT));
+
+       /*
+        * If the root has more than one child, then don't do anything.
+        */
+       if (INT_GET(oldroot->hdr.count, ARCH_CONVERT) > 1)
+               return(0);
+
+       /*
+        * Read in the (only) child block, then copy those bytes into
+        * the root block's buffer and free the original child block.
+        */
+       child = INT_GET(oldroot->btree[ 0 ].before, ARCH_CONVERT);
+       ASSERT(child != 0);
+       error = xfs_da_read_buf(args->trans, args->dp, child, -1, &bp,
+                                            args->whichfork);
+       if (error)
+               return(error);
+       ASSERT(bp != NULL);
+       blkinfo = bp->data;
+       if (INT_GET(oldroot->hdr.level, ARCH_CONVERT) == 1) {
+               ASSERT(INT_GET(blkinfo->magic, ARCH_CONVERT) == XFS_DIRX_LEAF_MAGIC(state->mp) ||
+                      INT_GET(blkinfo->magic, ARCH_CONVERT) == XFS_ATTR_LEAF_MAGIC);
+       } else {
+               ASSERT(INT_GET(blkinfo->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
+       }
+       ASSERT(INT_GET(blkinfo->forw, ARCH_CONVERT) == 0);
+       ASSERT(INT_GET(blkinfo->back, ARCH_CONVERT) == 0);
+       bcopy(bp->data, root_blk->bp->data, state->blocksize);
+       xfs_da_log_buf(args->trans, root_blk->bp, 0, state->blocksize - 1);
+       error = xfs_da_shrink_inode(args, child, bp);
+       return(error);
+}
+
+/*
+ * Check a node block and its neighbors to see if the block should be
+ * collapsed into one or the other neighbor.  Always keep the block
+ * with the smaller block number.
+ * If the current block is over 50% full, don't try to join it, return 0.
+ * If the block is empty, fill in the state structure and return 2.
+ * If it can be collapsed, fill in the state structure and return 1.
+ * If nothing can be done, return 0.
+ */
+STATIC int
+xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
+{
+       xfs_da_intnode_t *node;
+       xfs_da_state_blk_t *blk;
+       xfs_da_blkinfo_t *info;
+       int count, forward, error, retval, i;
+       xfs_dablk_t blkno;
+       xfs_dabuf_t *bp;
+
+       /*
+        * Check for the degenerate case of the block being over 50% full.
+        * If so, it's not worth even looking to see if we might be able
+        * to coalesce with a sibling.
+        */
+       blk = &state->path.blk[ state->path.active-1 ];
+       info = blk->bp->data;
+       ASSERT(INT_GET(info->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
+       node = (xfs_da_intnode_t *)info;
+       count = INT_GET(node->hdr.count, ARCH_CONVERT);
+       if (count > (XFS_DA_NODE_ENTRIES(state->mp) >> 1)) {
+               *action = 0;    /* blk over 50%, dont try to join */
+               return(0);      /* blk over 50%, dont try to join */
+       }
+
+       /*
+        * Check for the degenerate case of the block being empty.
+        * If the block is empty, we'll simply delete it, no need to
+        * coalesce it with a sibling block.  We choose (aribtrarily)
+        * to merge with the forward block unless it is NULL.
+        */
+       if (count == 0) {
+               /*
+                * Make altpath point to the block we want to keep and
+                * path point to the block we want to drop (this one).
+                */
+               forward = (!INT_ISZERO(info->forw, ARCH_CONVERT));
+               bcopy(&state->path, &state->altpath, sizeof(state->path));
+               error = xfs_da_path_shift(state, &state->altpath, forward,
+                                                0, &retval);
+               if (error)
+                       return(error);
+               if (retval) {
+                       *action = 0;
+               } else {
+                       *action = 2;
+               }
+               return(0);
+       }
+
+       /*
+        * Examine each sibling block to see if we can coalesce with
+        * at least 25% free space to spare.  We need to figure out
+        * whether to merge with the forward or the backward block.
+        * We prefer coalescing with the lower numbered sibling so as
+        * to shrink a directory over time.
+        */
+       /* start with smaller blk num */
+       forward = (INT_GET(info->forw, ARCH_CONVERT)
+                               < INT_GET(info->back, ARCH_CONVERT));
+       for (i = 0; i < 2; forward = !forward, i++) {
+               if (forward)
+                       blkno = INT_GET(info->forw, ARCH_CONVERT);
+               else
+                       blkno = INT_GET(info->back, ARCH_CONVERT);
+               if (blkno == 0)
+                       continue;
+               error = xfs_da_read_buf(state->args->trans, state->args->dp,
+                                       blkno, -1, &bp, state->args->whichfork);
+               if (error)
+                       return(error);
+               ASSERT(bp != NULL);
+
+               node = (xfs_da_intnode_t *)info;
+               count  = XFS_DA_NODE_ENTRIES(state->mp);
+               count -= XFS_DA_NODE_ENTRIES(state->mp) >> 2;
+               count -= INT_GET(node->hdr.count, ARCH_CONVERT);
+               node = bp->data;
+               ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
+               count -= INT_GET(node->hdr.count, ARCH_CONVERT);
+               xfs_da_brelse(state->args->trans, bp);
+               if (count >= 0)
+                       break;  /* fits with at least 25% to spare */
+       }
+       if (i >= 2) {
+               *action = 0;
+               return(0);
+       }
+
+       /*
+        * Make altpath point to the block we want to keep (the lower
+        * numbered block) and path point to the block we want to drop.
+        */
+       bcopy(&state->path, &state->altpath, sizeof(state->path));
+       if (blkno < blk->blkno) {
+               error = xfs_da_path_shift(state, &state->altpath, forward,
+                                                0, &retval);
+               if (error) {
+                       return(error);
+               }
+               if (retval) {
+                       *action = 0;
+                       return(0);
+               }
+       } else {
+               error = xfs_da_path_shift(state, &state->path, forward,
+                                                0, &retval);
+               if (error) {
+                       return(error);
+               }
+               if (retval) {
+                       *action = 0;
+                       return(0);
+               }
+       }
+       *action = 1;
+       return(0);
+}
+
+
+/*
+ * Walk back up the tree adjusting hash values as necessary,
+ * when we stop making changes, return.
+ */
+void
+xfs_da_fixhashpath(xfs_da_state_t *state, xfs_da_state_path_t *path)
+{
+       xfs_da_state_blk_t *blk;
+       xfs_da_intnode_t *node;
+       xfs_da_node_entry_t *btree;
+       xfs_dahash_t lasthash;
+       int level, count;
+
+       level = path->active-1;
+       blk = &path->blk[ level ];
+       switch (blk->magic) {
+#ifdef __KERNEL__
+       case XFS_ATTR_LEAF_MAGIC:
+               lasthash = xfs_attr_leaf_lasthash(blk->bp, &count);
+               if (count == 0)
+                       return;
+               break;
+#endif
+       case XFS_DIR_LEAF_MAGIC:
+               ASSERT(XFS_DIR_IS_V1(state->mp));
+               lasthash = xfs_dir_leaf_lasthash(blk->bp, &count);
+               if (count == 0)
+                       return;
+               break;
+       case XFS_DIR2_LEAFN_MAGIC:
+               ASSERT(XFS_DIR_IS_V2(state->mp));
+               lasthash = xfs_dir2_leafn_lasthash(blk->bp, &count);
+               if (count == 0)
+                       return;
+               break;
+       case XFS_DA_NODE_MAGIC:
+               lasthash = xfs_da_node_lasthash(blk->bp, &count);
+               if (count == 0)
+                       return;
+               break;
+       }
+       for (blk--, level--; level >= 0; blk--, level--) {
+               node = blk->bp->data;
+               ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
+               btree = &node->btree[ blk->index ];
+               if (INT_GET(btree->hashval, ARCH_CONVERT) == lasthash)
+                       break;
+               blk->hashval = lasthash;
+                INT_SET(btree->hashval, ARCH_CONVERT, lasthash);
+               xfs_da_log_buf(state->args->trans, blk->bp,
+                                 XFS_DA_LOGRANGE(node, btree, sizeof(*btree)));
+
+               lasthash = INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
+       }
+}
+
+
+
+/*
+ * Remove an entry from an intermediate node.
+ */
+STATIC void
+xfs_da_node_remove(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk)
+{
+       xfs_da_intnode_t *node;
+       xfs_da_node_entry_t *btree;
+       int tmp;
+
+       node = drop_blk->bp->data;
+       ASSERT(drop_blk->index < INT_GET(node->hdr.count, ARCH_CONVERT));
+       ASSERT(drop_blk->index >= 0);
+
+       /*
+        * Copy over the offending entry, or just zero it out.
+        */
+       btree = &node->btree[drop_blk->index];
+       if (drop_blk->index < (INT_GET(node->hdr.count, ARCH_CONVERT)-1)) {
+               tmp  = INT_GET(node->hdr.count, ARCH_CONVERT) - drop_blk->index - 1;
+               tmp *= (uint)sizeof(xfs_da_node_entry_t);
+               ovbcopy(btree + 1, btree, tmp);
+               xfs_da_log_buf(state->args->trans, drop_blk->bp,
+                   XFS_DA_LOGRANGE(node, btree, tmp));
+               btree = &node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ];
+       }
+       bzero((char *)btree, sizeof(xfs_da_node_entry_t));
+       xfs_da_log_buf(state->args->trans, drop_blk->bp,
+           XFS_DA_LOGRANGE(node, btree, sizeof(*btree)));
+       INT_MOD(node->hdr.count, ARCH_CONVERT, -1);
+       xfs_da_log_buf(state->args->trans, drop_blk->bp,
+           XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
+
+       /*
+        * Copy the last hash value from the block to propagate upwards.
+        */
+       btree--;
+       drop_blk->hashval = INT_GET(btree->hashval, ARCH_CONVERT);
+}
+
+/*
+ * Unbalance the btree elements between two intermediate nodes,
+ * move all Btree elements from one node into another.
+ */
+STATIC void
+xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
+                                    xfs_da_state_blk_t *save_blk)
+{
+       xfs_da_intnode_t *drop_node, *save_node;
+       xfs_da_node_entry_t *btree;
+       int tmp;
+       xfs_trans_t *tp;
+
+       drop_node = drop_blk->bp->data;
+       save_node = save_blk->bp->data;
+       ASSERT(INT_GET(drop_node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
+       ASSERT(INT_GET(save_node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
+       tp = state->args->trans;
+
+       /*
+        * If the dying block has lower hashvals, then move all the
+        * elements in the remaining block up to make a hole.
+        */
+       if ((INT_GET(drop_node->btree[ 0 ].hashval, ARCH_CONVERT) < INT_GET(save_node->btree[ 0 ].hashval, ARCH_CONVERT)) ||
+           (INT_GET(drop_node->btree[ INT_GET(drop_node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT) <
+            INT_GET(save_node->btree[ INT_GET(save_node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT)))
+       {
+               btree = &save_node->btree[ INT_GET(drop_node->hdr.count, ARCH_CONVERT) ];
+               tmp = INT_GET(save_node->hdr.count, ARCH_CONVERT) * (uint)sizeof(xfs_da_node_entry_t);
+               ovbcopy(&save_node->btree[0], btree, tmp);
+               btree = &save_node->btree[0];
+               xfs_da_log_buf(tp, save_blk->bp,
+                       XFS_DA_LOGRANGE(save_node, btree,
+                               (INT_GET(save_node->hdr.count, ARCH_CONVERT) + INT_GET(drop_node->hdr.count, ARCH_CONVERT)) *
+                               sizeof(xfs_da_node_entry_t)));
+       } else {
+               btree = &save_node->btree[ INT_GET(save_node->hdr.count, ARCH_CONVERT) ];
+               xfs_da_log_buf(tp, save_blk->bp,
+                       XFS_DA_LOGRANGE(save_node, btree,
+                               INT_GET(drop_node->hdr.count, ARCH_CONVERT) *
+                               sizeof(xfs_da_node_entry_t)));
+       }
+
+       /*
+        * Move all the B-tree elements from drop_blk to save_blk.
+        */
+       tmp = INT_GET(drop_node->hdr.count, ARCH_CONVERT) * (uint)sizeof(xfs_da_node_entry_t);
+       bcopy(&drop_node->btree[0], btree, tmp);
+       INT_MOD(save_node->hdr.count, ARCH_CONVERT, INT_GET(drop_node->hdr.count, ARCH_CONVERT));
+
+       xfs_da_log_buf(tp, save_blk->bp,
+               XFS_DA_LOGRANGE(save_node, &save_node->hdr,
+                       sizeof(save_node->hdr)));
+
+       /*
+        * Save the last hashval in the remaining block for upward propagation.
+        */
+       save_blk->hashval = INT_GET(save_node->btree[ INT_GET(save_node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
+}
+
+
+/*========================================================================
+ * Routines used for finding things in the Btree.
+ *========================================================================*/
+
+/*
+ * Walk down the Btree looking for a particular filename, filling
+ * in the state structure as we go.
+ *
+ * We will set the state structure to point to each of the elements
+ * in each of the nodes where either the hashval is or should be.
+ *
+ * We support duplicate hashval's so for each entry in the current
+ * node that could contain the desired hashval, descend.  This is a
+ * pruned depth-first tree search.
+ */
+int                                                    /* error */
+xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
+{
+       xfs_da_state_blk_t *blk;
+       xfs_da_blkinfo_t *curr;
+       xfs_da_intnode_t *node;
+       xfs_da_node_entry_t *btree;
+       xfs_dablk_t blkno;
+       int probe, span, max, error, retval;
+       xfs_dahash_t hashval;
+       xfs_da_args_t *args;
+
+       args = state->args;
+       /*
+        * Descend thru the B-tree searching each level for the right
+        * node to use, until the right hashval is found.
+        */
+       if (args->whichfork == XFS_DATA_FORK && XFS_DIR_IS_V2(state->mp))
+               blkno = state->mp->m_dirleafblk;
+       else
+               blkno = 0;
+       for (blk = &state->path.blk[0], state->path.active = 1;
+                        state->path.active <= XFS_DA_NODE_MAXDEPTH;
+                        blk++, state->path.active++) {
+               /*
+                * Read the next node down in the tree.
+                */
+               blk->blkno = blkno;
+               error = xfs_da_read_buf(state->args->trans, state->args->dp,
+                                       blkno, -1, &blk->bp,
+                                       state->args->whichfork);
+               if (error) {
+                       blk->blkno = 0;
+                       state->path.active--;
+                       return(error);
+               }
+               ASSERT(blk->bp != NULL);
+               curr = blk->bp->data;
+               ASSERT(INT_GET(curr->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC ||
+                      INT_GET(curr->magic, ARCH_CONVERT) == XFS_DIRX_LEAF_MAGIC(state->mp) ||
+                      INT_GET(curr->magic, ARCH_CONVERT) == XFS_ATTR_LEAF_MAGIC);
+
+               /*
+                * Search an intermediate node for a match.
+                */
+               blk->magic = INT_GET(curr->magic, ARCH_CONVERT);
+               if (INT_GET(curr->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC) {
+                       node = blk->bp->data;
+                       blk->hashval = INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
+
+                       /*
+                        * Binary search.  (note: small blocks will skip loop)
+                        */
+                       max = INT_GET(node->hdr.count, ARCH_CONVERT);
+                       probe = span = max / 2;
+                       hashval = state->args->hashval;
+                       for (btree = &node->btree[probe]; span > 4;
+                                  btree = &node->btree[probe]) {
+                               span /= 2;
+                               if (INT_GET(btree->hashval, ARCH_CONVERT) < hashval)
+                                       probe += span;
+                               else if (INT_GET(btree->hashval, ARCH_CONVERT) > hashval)
+                                       probe -= span;
+                               else
+                                       break;
+                       }
+                       ASSERT((probe >= 0) && (probe < max));
+                       ASSERT((span <= 4) || (INT_GET(btree->hashval, ARCH_CONVERT) == hashval));
+
+                       /*
+                        * Since we may have duplicate hashval's, find the first
+                        * matching hashval in the node.
+                        */
+                       while ((probe > 0) && (INT_GET(btree->hashval, ARCH_CONVERT) >= hashval)) {
+                               btree--;
+                               probe--;
+                       }
+                       while ((probe < max) && (INT_GET(btree->hashval, ARCH_CONVERT) < hashval)) {
+                               btree++;
+                               probe++;
+                       }
+
+                       /*
+                        * Pick the right block to descend on.
+                        */
+                       if (probe == max) {
+                               blk->index = max-1;
+                               blkno = INT_GET(node->btree[ max-1 ].before, ARCH_CONVERT);
+                       } else {
+                               blk->index = probe;
+                               blkno = INT_GET(btree->before, ARCH_CONVERT);   
+                       }
+               }
+#ifdef __KERNEL__
+               else if (INT_GET(curr->magic, ARCH_CONVERT) == XFS_ATTR_LEAF_MAGIC) {
+                       blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL);
+                       break;
+               }
+#endif
+               else if (INT_GET(curr->magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC) {
+                       blk->hashval = xfs_dir_leaf_lasthash(blk->bp, NULL);
+                       break;
+               }
+               else if (INT_GET(curr->magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC) {
+                       blk->hashval = xfs_dir2_leafn_lasthash(blk->bp, NULL);
+                       break;
+               }
+       }
+
+       /*
+        * A leaf block that ends in the hashval that we are interested in
+        * (final hashval == search hashval) means that the next block may
+        * contain more entries with the same hashval, shift upward to the
+        * next leaf and keep searching.
+        */
+       for (;;) {
+               if (blk->magic == XFS_DIR_LEAF_MAGIC) {
+                       ASSERT(XFS_DIR_IS_V1(state->mp));
+                       retval = xfs_dir_leaf_lookup_int(blk->bp, state->args,
+                                                                 &blk->index);
+               } else if (blk->magic == XFS_DIR2_LEAFN_MAGIC) {
+                       ASSERT(XFS_DIR_IS_V2(state->mp));
+                       retval = xfs_dir2_leafn_lookup_int(blk->bp, state->args,
+                                                       &blk->index, state);
+               }
+#ifdef __KERNEL__
+               else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
+                       retval = xfs_attr_leaf_lookup_int(blk->bp, state->args);
+                       blk->index = state->args->index;
+                       state->args->blkno = blk->blkno;
+               }
+#endif
+               if (((retval == ENOENT) || (retval == ENOATTR)) &&
+                   (blk->hashval == state->args->hashval)) {
+                       error = xfs_da_path_shift(state, &state->path, 1, 1,
+                                                        &retval);
+                       if (error)
+                               return(error);
+                       if (retval == 0) {
+                               continue;
+                       }
+#ifdef __KERNEL__
+                       else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
+                               /* path_shift() gives ENOENT */
+                               retval = XFS_ERROR(ENOATTR);
+                       }
+#endif
+               }
+               break;
+       }
+       *result = retval;
+       return(0);      
+}
+
+
+/*========================================================================
+ * Utility routines.
+ *========================================================================*/
+
+/*
+ * Link a new block into a doubly linked list of blocks (of whatever type).
+ */
+int                                                    /* error */
+xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
+                              xfs_da_state_blk_t *new_blk)
+{
+       xfs_da_blkinfo_t *old_info, *new_info, *tmp_info;
+       xfs_da_args_t *args;
+       int before, error;
+       xfs_dabuf_t *bp;
+
+       /*
+        * Set up environment.
+        */
+       args = state->args;
+       ASSERT(args != NULL);
+       old_info = old_blk->bp->data;
+       new_info = new_blk->bp->data;
+       ASSERT(old_blk->magic == XFS_DA_NODE_MAGIC ||
+              old_blk->magic == XFS_DIRX_LEAF_MAGIC(state->mp) ||
+              old_blk->magic == XFS_ATTR_LEAF_MAGIC);
+       ASSERT(old_blk->magic == INT_GET(old_info->magic, ARCH_CONVERT));
+       ASSERT(new_blk->magic == INT_GET(new_info->magic, ARCH_CONVERT));
+       ASSERT(old_blk->magic == new_blk->magic);
+
+       switch (old_blk->magic) {
+#ifdef __KERNEL__
+       case XFS_ATTR_LEAF_MAGIC:
+               before = xfs_attr_leaf_order(old_blk->bp, new_blk->bp);
+               break;
+#endif
+       case XFS_DIR_LEAF_MAGIC:
+               ASSERT(XFS_DIR_IS_V1(state->mp));
+               before = xfs_dir_leaf_order(old_blk->bp, new_blk->bp);
+               break;
+       case XFS_DIR2_LEAFN_MAGIC:
+               ASSERT(XFS_DIR_IS_V2(state->mp));
+               before = xfs_dir2_leafn_order(old_blk->bp, new_blk->bp);
+               break;
+       case XFS_DA_NODE_MAGIC:
+               before = xfs_da_node_order(old_blk->bp, new_blk->bp);
+               break;
+       }
+
+       /*
+        * Link blocks in appropriate order.
+        */
+       if (before) {
+               /*
+                * Link new block in before existing block.
+                */
+               INT_SET(new_info->forw, ARCH_CONVERT, old_blk->blkno);
+               new_info->back = old_info->back; /* INT_: direct copy */
+               if (INT_GET(old_info->back, ARCH_CONVERT)) {
+                       error = xfs_da_read_buf(args->trans, args->dp,
+                                               INT_GET(old_info->back,
+                                                       ARCH_CONVERT), -1, &bp,
+                                               args->whichfork);
+                       if (error)
+                               return(error);
+                       ASSERT(bp != NULL);
+                       tmp_info = bp->data;
+                       ASSERT(INT_GET(tmp_info->magic, ARCH_CONVERT) == INT_GET(old_info->magic, ARCH_CONVERT));
+                       ASSERT(INT_GET(tmp_info->forw, ARCH_CONVERT) == old_blk->blkno);
+                       INT_SET(tmp_info->forw, ARCH_CONVERT, new_blk->blkno);
+                       xfs_da_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
+                       xfs_da_buf_done(bp);
+               }
+               INT_SET(old_info->back, ARCH_CONVERT, new_blk->blkno);
+       } else {
+               /*
+                * Link new block in after existing block.
+                */
+               new_info->forw = old_info->forw; /* INT_: direct copy */
+               INT_SET(new_info->back, ARCH_CONVERT, old_blk->blkno);
+               if (INT_GET(old_info->forw, ARCH_CONVERT)) {
+                       error = xfs_da_read_buf(args->trans, args->dp,
+                                               INT_GET(old_info->forw, ARCH_CONVERT), -1, &bp,
+                                               args->whichfork);
+                       if (error)
+                               return(error);
+                       ASSERT(bp != NULL);
+                       tmp_info = bp->data;
+                       ASSERT(INT_GET(tmp_info->magic, ARCH_CONVERT)
+                                   == INT_GET(old_info->magic, ARCH_CONVERT));
+                       ASSERT(INT_GET(tmp_info->back, ARCH_CONVERT)
+                                   == old_blk->blkno);
+                       INT_SET(tmp_info->back, ARCH_CONVERT, new_blk->blkno);
+                       xfs_da_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
+                       xfs_da_buf_done(bp);
+               }
+               INT_SET(old_info->forw, ARCH_CONVERT, new_blk->blkno);
+       }
+
+       xfs_da_log_buf(args->trans, old_blk->bp, 0, sizeof(*tmp_info) - 1);
+       xfs_da_log_buf(args->trans, new_blk->bp, 0, sizeof(*tmp_info) - 1);
+       return(0);
+}
+
+
+/*
+ * Compare two intermediate nodes for "order".
+ */
+STATIC int
+xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp)
+{
+       xfs_da_intnode_t *node1, *node2;
+
+       node1 = node1_bp->data;
+       node2 = node2_bp->data;
+       ASSERT((INT_GET(node1->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC) &&
+              (INT_GET(node2->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC));
+       if ((INT_GET(node1->hdr.count, ARCH_CONVERT) > 0) && (INT_GET(node2->hdr.count, ARCH_CONVERT) > 0) && 
+           ((INT_GET(node2->btree[ 0 ].hashval, ARCH_CONVERT) <
+             INT_GET(node1->btree[ 0 ].hashval, ARCH_CONVERT)) ||
+            (INT_GET(node2->btree[ INT_GET(node2->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT) <
+             INT_GET(node1->btree[ INT_GET(node1->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT)))) {
+               return(1);
+       }
+       return(0);
+}
+
+
+/*
+ * Pick up the last hashvalue from an intermediate node.
+ */
+STATIC uint
+xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count)
+{
+       xfs_da_intnode_t *node;
+
+       node = bp->data;
+       ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
+       if (count)
+               *count = INT_GET(node->hdr.count, ARCH_CONVERT);
+       if (INT_GET(node->hdr.count, ARCH_CONVERT) == 0)
+               return(0);
+       return(INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT));
+}
+
+/*
+ * Unlink a block from a doubly linked list of blocks.
+ */
+int                                                    /* error */
+xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
+                                xfs_da_state_blk_t *save_blk)
+{
+       xfs_da_blkinfo_t *drop_info, *save_info, *tmp_info;
+       xfs_da_args_t *args;
+       xfs_dabuf_t *bp;
+       int error;
+
+       /*
+        * Set up environment.
+        */
+       args = state->args;
+       ASSERT(args != NULL);
+       save_info = save_blk->bp->data;
+       drop_info = drop_blk->bp->data;
+       ASSERT(save_blk->magic == XFS_DA_NODE_MAGIC ||
+              save_blk->magic == XFS_DIRX_LEAF_MAGIC(state->mp) ||
+              save_blk->magic == XFS_ATTR_LEAF_MAGIC);
+       ASSERT(save_blk->magic == INT_GET(save_info->magic, ARCH_CONVERT));
+       ASSERT(drop_blk->magic == INT_GET(drop_info->magic, ARCH_CONVERT));
+       ASSERT(save_blk->magic == drop_blk->magic);
+       ASSERT((INT_GET(save_info->forw, ARCH_CONVERT) == drop_blk->blkno) ||
+              (INT_GET(save_info->back, ARCH_CONVERT) == drop_blk->blkno));
+       ASSERT((INT_GET(drop_info->forw, ARCH_CONVERT) == save_blk->blkno) ||
+              (INT_GET(drop_info->back, ARCH_CONVERT) == save_blk->blkno));
+
+       /*
+        * Unlink the leaf block from the doubly linked chain of leaves.
+        */
+       if (INT_GET(save_info->back, ARCH_CONVERT) == drop_blk->blkno) {
+               save_info->back = drop_info->back; /* INT_: direct copy */
+               if (INT_GET(drop_info->back, ARCH_CONVERT)) {
+                       error = xfs_da_read_buf(args->trans, args->dp,
+                                               INT_GET(drop_info->back,
+                                                       ARCH_CONVERT), -1, &bp,
+                                               args->whichfork);
+                       if (error)
+                               return(error);
+                       ASSERT(bp != NULL);
+                       tmp_info = bp->data;
+                       ASSERT(INT_GET(tmp_info->magic, ARCH_CONVERT) == INT_GET(save_info->magic, ARCH_CONVERT));
+                       ASSERT(INT_GET(tmp_info->forw, ARCH_CONVERT) == drop_blk->blkno);
+                       INT_SET(tmp_info->forw, ARCH_CONVERT, save_blk->blkno);
+                       xfs_da_log_buf(args->trans, bp, 0,
+                                                   sizeof(*tmp_info) - 1);
+                       xfs_da_buf_done(bp);
+               }
+       } else {
+               save_info->forw = drop_info->forw; /* INT_: direct copy */
+               if (INT_GET(drop_info->forw, ARCH_CONVERT)) {
+                       error = xfs_da_read_buf(args->trans, args->dp,
+                                               INT_GET(drop_info->forw, ARCH_CONVERT), -1, &bp,
+                                               args->whichfork);
+                       if (error)
+                               return(error);
+                       ASSERT(bp != NULL);
+                       tmp_info = bp->data;
+                       ASSERT(INT_GET(tmp_info->magic, ARCH_CONVERT)
+                                   == INT_GET(save_info->magic, ARCH_CONVERT));
+                       ASSERT(INT_GET(tmp_info->back, ARCH_CONVERT)
+                                   == drop_blk->blkno);
+                       INT_SET(tmp_info->back, ARCH_CONVERT, save_blk->blkno);
+                       xfs_da_log_buf(args->trans, bp, 0,
+                                                   sizeof(*tmp_info) - 1);
+                       xfs_da_buf_done(bp);
+               }
+       }
+
+       xfs_da_log_buf(args->trans, save_blk->bp, 0, sizeof(*save_info) - 1);
+       return(0);
+}
+
+/*
+ * Move a path "forward" or "!forward" one block at the current level.
+ *
+ * This routine will adjust a "path" to point to the next block
+ * "forward" (higher hashvalues) or "!forward" (lower hashvals) in the
+ * Btree, including updating pointers to the intermediate nodes between
+ * the new bottom and the root.
+ */
+int                                                    /* error */
+xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
+                                int forward, int release, int *result)
+{
+       xfs_da_state_blk_t *blk;
+       xfs_da_blkinfo_t *info;
+       xfs_da_intnode_t *node;
+       xfs_da_args_t *args;
+       xfs_dablk_t blkno;
+       int level, error;
+
+       /*
+        * Roll up the Btree looking for the first block where our
+        * current index is not at the edge of the block.  Note that
+        * we skip the bottom layer because we want the sibling block.
+        */
+       args = state->args;
+       ASSERT(args != NULL);
+       ASSERT(path != NULL);
+       ASSERT((path->active > 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
+       level = (path->active-1) - 1;   /* skip bottom layer in path */
+       for (blk = &path->blk[level]; level >= 0; blk--, level--) {
+               ASSERT(blk->bp != NULL);
+               node = blk->bp->data;
+               ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
+               if (forward && (blk->index < INT_GET(node->hdr.count, ARCH_CONVERT)-1)) {
+                       blk->index++;
+                       blkno = INT_GET(node->btree[ blk->index ].before, ARCH_CONVERT);
+                       break;
+               } else if (!forward && (blk->index > 0)) {
+                       blk->index--;
+                       blkno = INT_GET(node->btree[ blk->index ].before, ARCH_CONVERT);
+                       break;
+               }
+       }
+       if (level < 0) {
+               *result = XFS_ERROR(ENOENT);    /* we're out of our tree */
+               ASSERT(args->oknoent);
+               return(0);
+       }
+
+       /*
+        * Roll down the edge of the subtree until we reach the
+        * same depth we were at originally.
+        */
+       for (blk++, level++; level < path->active; blk++, level++) {
+               /*
+                * Release the old block.
+                * (if it's dirty, trans won't actually let go)
+                */
+               if (release)
+                       xfs_da_brelse(args->trans, blk->bp);
+
+               /*
+                * Read the next child block.
+                */
+               blk->blkno = blkno;
+               error = xfs_da_read_buf(args->trans, args->dp, blkno, -1,
+                                                    &blk->bp, args->whichfork);
+               if (error)
+                       return(error);
+               ASSERT(blk->bp != NULL);
+               info = blk->bp->data;
+               ASSERT(INT_GET(info->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC ||
+                      INT_GET(info->magic, ARCH_CONVERT) == XFS_DIRX_LEAF_MAGIC(state->mp) ||
+                      INT_GET(info->magic, ARCH_CONVERT) == XFS_ATTR_LEAF_MAGIC);
+               blk->magic = INT_GET(info->magic, ARCH_CONVERT);
+               if (INT_GET(info->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC) {
+                       node = (xfs_da_intnode_t *)info;
+                       blk->hashval = INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
+                       if (forward)
+                               blk->index = 0;
+                       else
+                               blk->index = INT_GET(node->hdr.count, ARCH_CONVERT)-1;
+                       blkno = INT_GET(node->btree[ blk->index ].before, ARCH_CONVERT);
+               } else {
+                       ASSERT(level == path->active-1);
+                       blk->index = 0;
+                       switch(blk->magic) {
+#ifdef __KERNEL__
+                       case XFS_ATTR_LEAF_MAGIC:
+                               blk->hashval = xfs_attr_leaf_lasthash(blk->bp,
+                                                                     NULL);
+                               break;
+#endif
+                       case XFS_DIR_LEAF_MAGIC:
+                               ASSERT(XFS_DIR_IS_V1(state->mp));
+                               blk->hashval = xfs_dir_leaf_lasthash(blk->bp,
+                                                                    NULL);
+                               break;
+                       case XFS_DIR2_LEAFN_MAGIC:
+                               ASSERT(XFS_DIR_IS_V2(state->mp));
+                               blk->hashval = xfs_dir2_leafn_lasthash(blk->bp,
+                                                                      NULL);
+                               break;
+                       default:
+                               ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC ||
+                                      blk->magic ==
+                                      XFS_DIRX_LEAF_MAGIC(state->mp));
+                               break;
+                       }
+               }
+       }
+       *result = 0;
+       return(0);
+}
+
+
+/*========================================================================
+ * Utility routines.
+ *========================================================================*/
+
+/*
+ * Implement a simple hash on a character string.
+ * Rotate the hash value by 7 bits, then XOR each character in.
+ * This is implemented with some source-level loop unrolling.
+ */
+xfs_dahash_t
+xfs_da_hashname(char *name, int namelen)
+{
+       xfs_dahash_t hash;
+
+#define        ROTL(x,y)       (((x) << (y)) | ((x) >> (32 - (y))))
+#ifdef SLOWVERSION
+       /*
+        * This is the old one-byte-at-a-time version.
+        */
+       for (hash = 0; namelen > 0; namelen--) {
+               hash = *name++ ^ ROTL(hash, 7);
+       }
+       return(hash);
+#else
+       /*
+        * Do four characters at a time as long as we can.
+        */
+       for (hash = 0; namelen >= 4; namelen -= 4, name += 4) {
+               hash = (name[0] << 21) ^ (name[1] << 14) ^ (name[2] << 7) ^
+                      (name[3] << 0) ^ ROTL(hash, 7 * 4);
+       }
+       /*
+        * Now do the rest of the characters.
+        */
+       switch (namelen) {
+       case 3:
+               return (name[0] << 14) ^ (name[1] << 7) ^ (name[2] << 0) ^
+                      ROTL(hash, 7 * 3);
+       case 2:
+               return (name[0] << 7) ^ (name[1] << 0) ^ ROTL(hash, 7 * 2);
+       case 1:
+               return (name[0] << 0) ^ ROTL(hash, 7 * 1);
+       case 0:
+               return hash;
+       }
+       /* NOTREACHED */
+#endif
+#undef ROTL
+       return 0; /* keep gcc happy */
+}
+
+/*
+ * Add a block to the btree ahead of the file.
+ * Return the new block number to the caller.
+ */
+int
+xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
+{
+       xfs_fileoff_t bno, b;
+       xfs_bmbt_irec_t map;
+       xfs_bmbt_irec_t *mapp;
+       xfs_inode_t *dp;
+       int nmap, error, w, count, c, got, i, mapi;
+       xfs_fsize_t size;
+       xfs_trans_t *tp;
+       xfs_mount_t *mp;
+
+       dp = args->dp;
+       mp = dp->i_mount;
+       w = args->whichfork;
+       tp = args->trans;
+       /*
+        * For new directories adjust the file offset and block count.
+        */
+       if (w == XFS_DATA_FORK && XFS_DIR_IS_V2(mp)) {
+               bno = mp->m_dirleafblk;
+               count = mp->m_dirblkfsbs;
+       } else {
+               bno = 0;
+               count = 1;
+       }
+       /*
+        * Find a spot in the file space to put the new block.
+        */
+       if (error = xfs_bmap_first_unused(tp, dp, count, &bno, w)) {
+#pragma mips_frequency_hint NEVER
+               return error;
+       }
+       if (w == XFS_DATA_FORK && XFS_DIR_IS_V2(mp))
+               ASSERT(bno >= mp->m_dirleafblk && bno < mp->m_dirfreeblk);
+       /*
+        * Try mapping it in one filesystem block.
+        */
+       nmap = 1;
+       ASSERT(args->firstblock != NULL);
+       if (error = xfs_bmapi(tp, dp, bno, count,
+                       XFS_BMAPI_AFLAG(w)|XFS_BMAPI_WRITE|XFS_BMAPI_METADATA|
+                       XFS_BMAPI_CONTIG,
+                       args->firstblock, args->total, &map, &nmap,
+                       args->flist)) {
+#pragma mips_frequency_hint NEVER
+               return error;
+       }
+       ASSERT(nmap <= 1);
+       if (nmap == 1) {
+               mapp = &map;
+               mapi = 1;
+       }
+       /*
+        * If we didn't get it and the block might work if fragmented,
+        * try without the CONTIG flag.  Loop until we get it all.
+        */
+       else if (nmap == 0 && count > 1) {
+#pragma mips_frequency_hint NEVER
+               mapp = kmem_alloc(sizeof(*mapp) * count, KM_SLEEP);
+               for (b = bno, mapi = 0; b < bno + count; ) {
+                       nmap = MIN(XFS_BMAP_MAX_NMAP, count);
+                       c = (int)(bno + count - b);
+                       if (error = xfs_bmapi(tp, dp, b, c,
+                                       XFS_BMAPI_AFLAG(w)|XFS_BMAPI_WRITE|
+                                       XFS_BMAPI_METADATA,
+                                       args->firstblock, args->total,
+                                       &mapp[mapi], &nmap, args->flist)) {
+                               kmem_free(mapp, sizeof(*mapp) * count);
+                               return error;
+                       }
+                       if (nmap < 1)
+                               break;
+                       mapi += nmap;
+                       b = mapp[mapi - 1].br_startoff +
+                           mapp[mapi - 1].br_blockcount;
+               }
+       } else {
+#pragma mips_frequency_hint NEVER
+               mapi = 0;
+               mapp = NULL;
+       }
+       /*
+        * Count the blocks we got, make sure it matches the total.
+        */
+       for (i = 0, got = 0; i < mapi; i++)
+               got += mapp[i].br_blockcount;
+       if (got != count || mapp[0].br_startoff != bno ||
+           mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount !=
+           bno + count) {
+#pragma mips_frequency_hint NEVER
+               if (mapp != &map)
+                       kmem_free(mapp, sizeof(*mapp) * count);
+               return XFS_ERROR(ENOSPC);
+       }
+       if (mapp != &map)
+               kmem_free(mapp, sizeof(*mapp) * count);
+       *new_blkno = (xfs_dablk_t)bno;
+       /*
+        * For version 1 directories, adjust the file size if it changed.
+        */
+       if (w == XFS_DATA_FORK && XFS_DIR_IS_V1(mp)) {
+               ASSERT(mapi == 1);
+               if (error = xfs_bmap_last_offset(tp, dp, &bno, w))
+                       return error;
+               size = XFS_FSB_TO_B(mp, bno);
+               if (size != dp->i_d.di_size) {
+                       dp->i_d.di_size = size;
+                       xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
+               }
+       }
+       return 0;
+}
+
+
+/*
+ * Ick.  We need to always be able to remove a btree block, even
+ * if there's no space reservation because the filesystem is full.
+ * This is called if xfs_bunmapi on a btree block fails due to ENOSPC.
+ * It swaps the target block with the last block in the file.  The
+ * last block in the file can always be removed since it can't cause
+ * a bmap btree split to do that.
+ */
+STATIC int
+xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
+                     xfs_dabuf_t **dead_bufp)
+{
+       xfs_dablk_t dead_blkno, last_blkno, sib_blkno, par_blkno;
+       xfs_dabuf_t *dead_buf, *last_buf, *sib_buf, *par_buf;
+       xfs_fileoff_t lastoff;
+       xfs_inode_t *ip;
+       xfs_trans_t *tp;
+       xfs_mount_t *mp;
+       int error, w, entno, level, dead_level;
+       xfs_da_blkinfo_t *dead_info, *sib_info;
+       xfs_da_intnode_t *par_node, *dead_node;
+       xfs_dir_leafblock_t *dead_leaf;
+       xfs_dir2_leaf_t *dead_leaf2;
+       xfs_dahash_t dead_hash;
+
+       dead_buf = *dead_bufp;
+       dead_blkno = *dead_blknop;
+       tp = args->trans;
+       ip = args->dp;
+       w = args->whichfork;
+       ASSERT(w == XFS_DATA_FORK);
+       mp = ip->i_mount;
+       if (XFS_DIR_IS_V2(mp)) {
+               lastoff = mp->m_dirfreeblk;
+               error = xfs_bmap_last_before(tp, ip, &lastoff, w);
+       } else
+               error = xfs_bmap_last_offset(tp, ip, &lastoff, w);
+       if (error)
+               return error;
+       if (lastoff == 0)
+               return XFS_ERROR(EFSCORRUPTED);
+       /*
+        * Read the last block in the btree space.
+        */
+       last_blkno = (xfs_dablk_t)lastoff - mp->m_dirblkfsbs;
+       if (error = xfs_da_read_buf(tp, ip, last_blkno, -1, &last_buf, w))
+               return error;
+       /*
+        * Copy the last block into the dead buffer and log it.
+        */
+       bcopy(last_buf->data, dead_buf->data, mp->m_dirblksize);
+       xfs_da_log_buf(tp, dead_buf, 0, mp->m_dirblksize - 1);
+       dead_info = dead_buf->data;
+       /*
+        * Get values from the moved block.
+        */
+       if (INT_GET(dead_info->magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC) {
+               ASSERT(XFS_DIR_IS_V1(mp));
+               dead_leaf = (xfs_dir_leafblock_t *)dead_info;
+               dead_level = 0;
+               dead_hash =
+                       INT_GET(dead_leaf->entries[INT_GET(dead_leaf->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT);
+       } else if (INT_GET(dead_info->magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC) {
+               ASSERT(XFS_DIR_IS_V2(mp));
+               dead_leaf2 = (xfs_dir2_leaf_t *)dead_info;
+               dead_level = 0;
+               dead_hash = INT_GET(dead_leaf2->ents[INT_GET(dead_leaf2->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT);
+       } else {
+               ASSERT(INT_GET(dead_info->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
+               dead_node = (xfs_da_intnode_t *)dead_info;
+               dead_level = INT_GET(dead_node->hdr.level, ARCH_CONVERT);
+               dead_hash = INT_GET(dead_node->btree[INT_GET(dead_node->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT);
+       }
+       sib_buf = par_buf = NULL;
+       /*
+        * If the moved block has a left sibling, fix up the pointers.
+        */
+       if (sib_blkno = INT_GET(dead_info->back, ARCH_CONVERT)) {
+               if (error = xfs_da_read_buf(tp, ip, sib_blkno, -1, &sib_buf, w))
+                       goto done;
+               sib_info = sib_buf->data;
+               if (INT_GET(sib_info->forw, ARCH_CONVERT) != last_blkno ||
+                   INT_GET(sib_info->magic, ARCH_CONVERT) != INT_GET(dead_info->magic, ARCH_CONVERT)) {
+                       error = XFS_ERROR(EFSCORRUPTED);
+                       goto done;
+               }
+               INT_SET(sib_info->forw, ARCH_CONVERT, dead_blkno);
+               xfs_da_log_buf(tp, sib_buf,
+                       XFS_DA_LOGRANGE(sib_info, &sib_info->forw,
+                                       sizeof(sib_info->forw)));
+               xfs_da_buf_done(sib_buf);
+               sib_buf = NULL;
+       }
+       /*
+        * If the moved block has a right sibling, fix up the pointers.
+        */
+       if (sib_blkno = INT_GET(dead_info->forw, ARCH_CONVERT)) {
+               if (error = xfs_da_read_buf(tp, ip, sib_blkno, -1, &sib_buf, w))
+                       goto done;
+               sib_info = sib_buf->data;
+               if (   INT_GET(sib_info->back, ARCH_CONVERT) != last_blkno
+                   || INT_GET(sib_info->magic, ARCH_CONVERT)
+                               != INT_GET(dead_info->magic, ARCH_CONVERT)) {
+                       error = XFS_ERROR(EFSCORRUPTED);
+                       goto done;
+               }
+               INT_SET(sib_info->back, ARCH_CONVERT, dead_blkno);
+               xfs_da_log_buf(tp, sib_buf,
+                       XFS_DA_LOGRANGE(sib_info, &sib_info->back,
+                                       sizeof(sib_info->back)));
+               xfs_da_buf_done(sib_buf);
+               sib_buf = NULL;
+       }
+       par_blkno = XFS_DIR_IS_V1(mp) ? 0 : mp->m_dirleafblk;
+       level = -1;
+       /*
+        * Walk down the tree looking for the parent of the moved block.
+        */
+       for (;;) {
+               if (error = xfs_da_read_buf(tp, ip, par_blkno, -1, &par_buf, w))
+                       goto done;
+               par_node = par_buf->data;
+               if (INT_GET(par_node->hdr.info.magic, ARCH_CONVERT) != XFS_DA_NODE_MAGIC ||
+                   (level >= 0 && level != INT_GET(par_node->hdr.level, ARCH_CONVERT) + 1)) {
+                       error = XFS_ERROR(EFSCORRUPTED);
+                       goto done;
+               }
+               level = INT_GET(par_node->hdr.level, ARCH_CONVERT);
+               for (entno = 0;
+                    entno < INT_GET(par_node->hdr.count, ARCH_CONVERT) &&
+                    INT_GET(par_node->btree[entno].hashval, ARCH_CONVERT) < dead_hash;
+                    entno++)
+                       continue;
+               if (entno == INT_GET(par_node->hdr.count, ARCH_CONVERT)) {
+                       error = XFS_ERROR(EFSCORRUPTED);
+                       goto done;
+               }
+               par_blkno = INT_GET(par_node->btree[entno].before, ARCH_CONVERT);
+               if (level == dead_level + 1)
+                       break;
+               xfs_da_brelse(tp, par_buf);
+               par_buf = NULL;
+       }
+       /*
+        * We're in the right parent block.
+        * Look for the right entry.
+        */
+       for (;;) {
+               for (;
+                    entno < INT_GET(par_node->hdr.count, ARCH_CONVERT) &&
+                    INT_GET(par_node->btree[entno].before, ARCH_CONVERT) != last_blkno;
+                    entno++)
+                       continue;
+               if (entno < INT_GET(par_node->hdr.count, ARCH_CONVERT))
+                       break;
+               par_blkno = INT_GET(par_node->hdr.info.forw, ARCH_CONVERT);
+               xfs_da_brelse(tp, par_buf);
+               par_buf = NULL;
+               if (par_blkno == 0) {
+                       error = XFS_ERROR(EFSCORRUPTED);
+                       goto done;
+               }
+               if (error = xfs_da_read_buf(tp, ip, par_blkno, -1, &par_buf, w))
+                       goto done;
+               par_node = par_buf->data;
+               if (INT_GET(par_node->hdr.level, ARCH_CONVERT) != level ||
+                   INT_GET(par_node->hdr.info.magic, ARCH_CONVERT) != XFS_DA_NODE_MAGIC) {
+                       error = XFS_ERROR(EFSCORRUPTED);
+                       goto done;
+               }
+               entno = 0;
+       }
+       /*
+        * Update the parent entry pointing to the moved block.
+        */
+       INT_SET(par_node->btree[entno].before, ARCH_CONVERT, dead_blkno);
+       xfs_da_log_buf(tp, par_buf,
+               XFS_DA_LOGRANGE(par_node, &par_node->btree[entno].before,
+                               sizeof(par_node->btree[entno].before)));
+       xfs_da_buf_done(par_buf);
+       xfs_da_buf_done(dead_buf);
+       *dead_blknop = last_blkno;
+       *dead_bufp = last_buf;
+       return 0;
+done:
+       if (par_buf)
+               xfs_da_brelse(tp, par_buf);
+       if (sib_buf)
+               xfs_da_brelse(tp, sib_buf);
+       xfs_da_brelse(tp, last_buf);
+       return error;
+}
+
+/*
+ * Remove a btree block from a directory or attribute.
+ */
+int
+xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
+                   xfs_dabuf_t *dead_buf)
+{
+       xfs_inode_t *dp;
+       int done, error, w, count;
+       xfs_fileoff_t bno;
+       xfs_fsize_t size;
+       xfs_trans_t *tp;
+       xfs_mount_t *mp;
+
+       dp = args->dp;
+       w = args->whichfork;
+       tp = args->trans;
+       mp = dp->i_mount;
+       if (w == XFS_DATA_FORK && XFS_DIR_IS_V2(mp))
+               count = mp->m_dirblkfsbs;
+       else
+               count = 1;
+       for (;;) {
+               /*
+                * Remove extents.  If we get ENOSPC for a dir we have to move
+                * the last block to the place we want to kill.
+                */
+               if ((error = xfs_bunmapi(tp, dp, dead_blkno, count,
+                               XFS_BMAPI_AFLAG(w)|XFS_BMAPI_METADATA,
+                               0, args->firstblock, args->flist,
+                               &done)) == ENOSPC) {
+                       if (w != XFS_DATA_FORK)
+                               goto done;
+                       if (error = xfs_da_swap_lastblock(args, &dead_blkno,
+                                       &dead_buf))
+                               goto done;
+               } else if (error)
+                       goto done;
+               else
+                       break;
+       }
+       ASSERT(done);
+       xfs_da_binval(tp, dead_buf);
+       /*
+        * Adjust the directory size for version 1.
+        */
+       if (w == XFS_DATA_FORK && XFS_DIR_IS_V1(mp)) {
+               if (error = xfs_bmap_last_offset(tp, dp, &bno, w))
+                       return error;
+               size = XFS_FSB_TO_B(dp->i_mount, bno);
+               if (size != dp->i_d.di_size) {
+                       dp->i_d.di_size = size;
+                       xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
+               }
+       }
+       return 0;
+done:
+       xfs_da_binval(tp, dead_buf);
+       return error;
+}
+
+/*
+ * See if the mapping(s) for this btree block are valid, i.e.
+ * don't contain holes, are logically contiguous, and cover the whole range.
+ */
+STATIC int
+xfs_da_map_covers_blocks(
+       int             nmap,
+       xfs_bmbt_irec_t *mapp,
+       xfs_dablk_t     bno,
+       int             count)
+{
+       int             i;
+       xfs_fileoff_t   off;
+
+       for (i = 0, off = bno; i < nmap; i++) {
+               if (mapp[i].br_startblock == HOLESTARTBLOCK ||
+                   mapp[i].br_startblock == DELAYSTARTBLOCK) {
+#pragma mips_frequency_hint NEVER
+                       return 0;
+               }
+               if (off != mapp[i].br_startoff) {
+#pragma mips_frequency_hint NEVER
+                       return 0;
+               }
+               off += mapp[i].br_blockcount;
+       }
+       return off == bno + count;
+}
+
+/*
+ * Make a dabuf.
+ * Used for get_buf, read_buf, read_bufr, and reada_buf.
+ */
+STATIC int
+xfs_da_do_buf(
+       xfs_trans_t     *trans,
+       xfs_inode_t     *dp,
+       xfs_dablk_t     bno,
+       xfs_daddr_t     *mappedbnop,
+       xfs_dabuf_t     **bpp,
+       int             whichfork,
+       int             caller,
+       inst_t          *ra)
+{
+       xfs_buf_t               *bp = 0;
+       xfs_buf_t               **bplist;
+       int             error;
+       int             i;
+       xfs_bmbt_irec_t map;
+       xfs_bmbt_irec_t *mapp;
+       xfs_daddr_t     mappedbno;
+       xfs_mount_t     *mp;
+       int             nbplist;
+       int             nfsb;
+       int             nmap;
+       xfs_dabuf_t     *rbp;
+
+       mp = dp->i_mount;
+       if (whichfork == XFS_DATA_FORK && XFS_DIR_IS_V2(mp))
+               nfsb = mp->m_dirblkfsbs;
+       else
+               nfsb = 1;
+       mappedbno = *mappedbnop;
+       /*
+        * Caller doesn't have a mapping.  -2 means don't complain
+        * if we land in a hole.
+        */
+       if (mappedbno == -1 || mappedbno == -2) {
+               /*
+                * Optimize the one-block case.
+                */
+               if (nfsb == 1) {
+                       xfs_fsblock_t   fsb;
+
+                       if (error =
+                           xfs_bmapi_single(trans, dp, whichfork, &fsb,
+                                   (xfs_fileoff_t)bno)) {
+#pragma mips_frequency_hint NEVER
+                               return error;
+                       }
+                       mapp = &map;
+                       if (fsb == NULLFSBLOCK) {
+#pragma mips_frequency_hint NEVER
+                               nmap = 0;
+                       } else {
+                               map.br_startblock = fsb;
+                               map.br_startoff = (xfs_fileoff_t)bno;
+                               map.br_blockcount = 1;
+                               nmap = 1;
+                       }
+               } else {
+#pragma mips_frequency_hint NEVER
+                       xfs_fsblock_t   firstblock;
+
+                       firstblock = NULLFSBLOCK;
+                       mapp = kmem_alloc(sizeof(*mapp) * nfsb, KM_SLEEP);
+                       nmap = nfsb;
+                       if (error = xfs_bmapi(trans, dp, (xfs_fileoff_t)bno,
+                                       nfsb,
+                                       XFS_BMAPI_METADATA |
+                                               XFS_BMAPI_AFLAG(whichfork),
+                                       &firstblock, 0, mapp, &nmap, NULL))
+                               goto exit0;
+               }
+       } else {
+               map.br_startblock = XFS_DADDR_TO_FSB(mp, mappedbno);
+               map.br_startoff = (xfs_fileoff_t)bno;
+               map.br_blockcount = nfsb;
+               mapp = &map;
+               nmap = 1;
+       }
+       if (!xfs_da_map_covers_blocks(nmap, mapp, bno, nfsb)) {
+#pragma mips_frequency_hint NEVER
+               error = mappedbno == -2 ? 0 : XFS_ERROR(EFSCORRUPTED);
+               goto exit0;
+       }
+       if (caller != 3 && nmap > 1) {
+#pragma mips_frequency_hint NEVER
+               bplist = kmem_alloc(sizeof(*bplist) * nmap, KM_SLEEP);
+               nbplist = 0;
+       } else
+               bplist = NULL;
+       /*
+        * Turn the mapping(s) into buffer(s).
+        */
+       for (i = 0; i < nmap; i++) {
+               int     nmapped;
+
+               mappedbno = XFS_FSB_TO_DADDR(mp, mapp[i].br_startblock);
+               if (i == 0)
+                       *mappedbnop = mappedbno;
+               nmapped = (int)XFS_FSB_TO_BB(mp, mapp[i].br_blockcount);
+               switch (caller) {
+               case 0:
+                       bp = xfs_trans_get_buf(trans, mp->m_ddev_targp,
+                               mappedbno, nmapped, 0);
+                       error = bp ? XFS_BUF_GETERROR(bp) : XFS_ERROR(EIO);
+                       break;
+               case 1:
+#ifndef __KERNEL__
+               case 2:
+#endif
+                       bp = NULL;
+                       error = xfs_trans_read_buf(mp, trans, mp->m_ddev_targp,
+                               mappedbno, nmapped, 0, &bp);
+                       break;
+#ifdef __KERNEL__
+               case 3:
+                       xfs_baread(mp->m_ddev_targp, mappedbno, nmapped);
+                       error = 0;
+                       bp = NULL;
+                       break;
+#endif
+               }
+               if (error) {
+#pragma mips_frequency_hint NEVER
+                       if (bp)
+                               xfs_trans_brelse(trans, bp);
+                       goto exit1;
+               }
+               if (!bp)
+                       continue;
+               if (caller == 1) {
+                       if (whichfork == XFS_ATTR_FORK) {
+                               XFS_BUF_SET_VTYPE_REF(bp, B_FS_ATTR_BTREE, 
+                                               XFS_ATTR_BTREE_REF);
+                       } else {
+                               XFS_BUF_SET_VTYPE_REF(bp, B_FS_DIR_BTREE,
+                                               XFS_DIR_BTREE_REF);
+                       }
+               }
+               if (bplist) {
+#pragma mips_frequency_hint NEVER
+                       bplist[nbplist++] = bp;
+               }
+       }
+       /*
+        * Build a dabuf structure.
+        */
+       if (bplist) {
+#pragma mips_frequency_hint NEVER
+               rbp = xfs_da_buf_make(nbplist, bplist, ra);
+       } else if (bp)
+               rbp = xfs_da_buf_make(1, &bp, ra);
+       else
+               rbp = NULL;
+       /*
+        * For read_buf, check the magic number.
+        */
+       if (caller == 1) {
+               xfs_dir2_data_t         *data;
+               xfs_dir2_free_t         *free;
+               xfs_da_blkinfo_t        *info;
+
+               info = rbp->data;
+               data = rbp->data;
+               free = rbp->data;
+               if (XFS_TEST_ERROR((INT_GET(info->magic, ARCH_CONVERT) != XFS_DA_NODE_MAGIC) &&
+                                  (INT_GET(info->magic, ARCH_CONVERT) != XFS_DIR_LEAF_MAGIC) &&
+                                  (INT_GET(info->magic, ARCH_CONVERT) != XFS_ATTR_LEAF_MAGIC) &&
+                                  (INT_GET(info->magic, ARCH_CONVERT) != XFS_DIR2_LEAF1_MAGIC) &&
+                                  (INT_GET(info->magic, ARCH_CONVERT) != XFS_DIR2_LEAFN_MAGIC) &&
+                                  (INT_GET(data->hdr.magic, ARCH_CONVERT) != XFS_DIR2_BLOCK_MAGIC) &&
+                                  (INT_GET(data->hdr.magic, ARCH_CONVERT) != XFS_DIR2_DATA_MAGIC) &&
+                                  (INT_GET(free->hdr.magic, ARCH_CONVERT) != XFS_DIR2_FREE_MAGIC),
+                               mp, XFS_ERRTAG_DA_READ_BUF,
+                               XFS_RANDOM_DA_READ_BUF)) {
+#pragma mips_frequency_hint NEVER
+                       xfs_buftrace("DA READ ERROR", rbp->bps[0]);
+                       error = XFS_ERROR(EFSCORRUPTED);
+                       xfs_da_brelse(trans, rbp);
+                       nbplist = 0;
+                       goto exit1;
+               }
+       }
+       if (bplist) {
+#pragma mips_frequency_hint NEVER
+               kmem_free(bplist, sizeof(*bplist) * nmap);
+       }
+       if (mapp != &map) {
+#pragma mips_frequency_hint NEVER
+               kmem_free(mapp, sizeof(*mapp) * nfsb);
+       }
+       if (bpp)
+               *bpp = rbp;
+       return 0;
+exit1:
+       if (bplist) {
+               for (i = 0; i < nbplist; i++)
+                       xfs_trans_brelse(trans, bplist[i]);
+               kmem_free(bplist, sizeof(*bplist) * nmap);
+       }
+exit0:
+       if (mapp != &map)
+               kmem_free(mapp, sizeof(*mapp) * nfsb);
+       if (bpp)
+               *bpp = NULL;
+       return error;
+}
+
+/*
+ * Get a buffer for the dir/attr block.
+ */
+int
+xfs_da_get_buf(
+       xfs_trans_t     *trans,
+       xfs_inode_t     *dp,
+       xfs_dablk_t     bno,
+       xfs_daddr_t             mappedbno,
+       xfs_dabuf_t     **bpp,
+       int             whichfork)
+{
+       return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 0,
+                                                (inst_t *)__return_address);
+}
+
+/*
+ * Get a buffer for the dir/attr block, fill in the contents.
+ */
+int
+xfs_da_read_buf(
+       xfs_trans_t     *trans,
+       xfs_inode_t     *dp,
+       xfs_dablk_t     bno,
+       xfs_daddr_t             mappedbno,
+       xfs_dabuf_t     **bpp,
+       int             whichfork)
+{
+       return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 1,
+               (inst_t *)__return_address);
+}
+
+/*
+ * Calculate the number of bits needed to hold i different values.
+ */
+uint
+xfs_da_log2_roundup(uint i)
+{
+       uint rval;
+
+       for (rval = 0; rval < NBBY * sizeof(i); rval++) {
+               if ((1 << rval) >= i)
+                       break;
+       }
+       return(rval);
+}
+
+xfs_zone_t *xfs_da_state_zone; /* anchor for state struct zone */
+xfs_zone_t *xfs_dabuf_zone;            /* dabuf zone */
+
+/*
+ * Allocate a dir-state structure.
+ * We don't put them on the stack since they're large.
+ */
+xfs_da_state_t *
+xfs_da_state_alloc(void)
+{
+       return kmem_zone_zalloc(xfs_da_state_zone, KM_SLEEP);
+}
+
+/*
+ * Kill the altpath contents of a da-state structure.
+ */
+void
+xfs_da_state_kill_altpath(xfs_da_state_t *state)
+{
+       int     i;
+
+       for (i = 0; i < state->altpath.active; i++) {
+               if (state->altpath.blk[i].bp) {
+                       if (state->altpath.blk[i].bp != state->path.blk[i].bp)
+                               xfs_da_buf_done(state->altpath.blk[i].bp);
+                       state->altpath.blk[i].bp = NULL;
+               }
+       }
+       state->altpath.active = 0;
+}
+
+/*
+ * Free a da-state structure.
+ */
+void
+xfs_da_state_free(xfs_da_state_t *state)
+{
+       int     i;
+
+       xfs_da_state_kill_altpath(state);
+       for (i = 0; i < state->path.active; i++) {
+               if (state->path.blk[i].bp)
+                       xfs_da_buf_done(state->path.blk[i].bp);
+       }
+       if (state->extravalid && state->extrablk.bp)
+               xfs_da_buf_done(state->extrablk.bp);
+#ifdef DEBUG
+       bzero((char *)state, sizeof(*state));
+#endif /* DEBUG */
+       kmem_zone_free(xfs_da_state_zone, state);
+}
+
+#ifdef XFS_DABUF_DEBUG
+xfs_dabuf_t    *xfs_dabuf_global_list;
+lock_t         xfs_dabuf_global_lock;
+#endif
+
+/*
+ * Create a dabuf.
+ */
+/* ARGSUSED */
+STATIC xfs_dabuf_t *
+xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra)
+{
+       xfs_buf_t               *bp;
+       xfs_dabuf_t     *dabuf;
+       int             i;
+       int             off;
+
+       if (nbuf == 1)
+               dabuf = kmem_zone_alloc(xfs_dabuf_zone, KM_SLEEP);
+       else
+               dabuf = kmem_alloc(XFS_DA_BUF_SIZE(nbuf), KM_SLEEP);
+       dabuf->dirty = 0;
+#ifdef XFS_DABUF_DEBUG
+       dabuf->ra = ra;
+       dabuf->dev = XFS_BUF_TARGET(bps[0]);
+       dabuf->blkno = XFS_BUF_ADDR(bps[0]);
+#endif
+       if (nbuf == 1) {
+               dabuf->nbuf = 1;
+               bp = bps[0];
+               dabuf->bbcount = (short)BTOBB(XFS_BUF_COUNT(bp));
+               dabuf->data = XFS_BUF_PTR(bp);
+               dabuf->bps[0] = bp;
+       } else {
+               dabuf->nbuf = nbuf;
+               for (i = 0, dabuf->bbcount = 0; i < nbuf; i++) {
+                       dabuf->bps[i] = bp = bps[i];
+                       dabuf->bbcount += BTOBB(XFS_BUF_COUNT(bp));
+               }
+               dabuf->data = kmem_alloc(BBTOB(dabuf->bbcount), KM_SLEEP);
+               for (i = off = 0; i < nbuf; i++, off += XFS_BUF_COUNT(bp)) {
+                       bp = bps[i];
+                       bcopy(XFS_BUF_PTR(bp), (char *)dabuf->data + off,
+                               XFS_BUF_COUNT(bp));
+               }
+       }
+#ifdef XFS_DABUF_DEBUG
+       {
+               int             s;
+               xfs_dabuf_t     *p;
+
+               s = mutex_spinlock(&xfs_dabuf_global_lock);
+               for (p = xfs_dabuf_global_list; p; p = p->next) {
+                       ASSERT(p->blkno != dabuf->blkno ||
+                              p->dev != dabuf->dev);
+               }
+               dabuf->prev = NULL;
+               if (xfs_dabuf_global_list)
+                       xfs_dabuf_global_list->prev = dabuf;
+               dabuf->next = xfs_dabuf_global_list;
+               xfs_dabuf_global_list = dabuf;
+               mutex_spinunlock(&xfs_dabuf_global_lock, s);
+       }
+#endif
+       return dabuf;
+}
+
+/*
+ * Un-dirty a dabuf.
+ */
+STATIC void
+xfs_da_buf_clean(xfs_dabuf_t *dabuf)
+{
+       xfs_buf_t       *bp;
+       int     i;
+       int     off;
+
+       if (dabuf->dirty) {
+               ASSERT(dabuf->nbuf > 1);
+               dabuf->dirty = 0;
+               for (i = off = 0; i < dabuf->nbuf;
+                               i++, off += XFS_BUF_COUNT(bp)) {
+                       bp = dabuf->bps[i];
+                       bcopy((char *)dabuf->data + off, XFS_BUF_PTR(bp),
+                               XFS_BUF_COUNT(bp));
+               }
+       }
+}
+
+/*
+ * Release a dabuf.
+ */
+void
+xfs_da_buf_done(xfs_dabuf_t *dabuf)
+{
+        ASSERT(dabuf);
+       ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
+       if (dabuf->dirty)
+               xfs_da_buf_clean(dabuf);
+       if (dabuf->nbuf > 1)
+               kmem_free(dabuf->data, BBTOB(dabuf->bbcount));
+#ifdef XFS_DABUF_DEBUG
+       {
+               int     s;
+
+               s = mutex_spinlock(&xfs_dabuf_global_lock);
+               if (dabuf->prev)
+                       dabuf->prev->next = dabuf->next;
+               else
+                       xfs_dabuf_global_list = dabuf->next;
+               if (dabuf->next)
+                       dabuf->next->prev = dabuf->prev;
+               mutex_spinunlock(&xfs_dabuf_global_lock, s);
+       }
+       bzero(dabuf, XFS_DA_BUF_SIZE(dabuf->nbuf));
+#endif
+       if (dabuf->nbuf == 1)
+               kmem_zone_free(xfs_dabuf_zone, dabuf);
+       else
+               kmem_free(dabuf, XFS_DA_BUF_SIZE(dabuf->nbuf));
+}
+
+/*
+ * Log transaction from a dabuf.
+ */
+void
+xfs_da_log_buf(xfs_trans_t *tp, xfs_dabuf_t *dabuf, uint first, uint last)
+{
+       xfs_buf_t       *bp;
+       uint    f;
+       int     i;
+       uint    l;
+       int     off;
+
+       ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
+       if (dabuf->nbuf == 1) {
+               ASSERT(dabuf->data == (void *)XFS_BUF_PTR(dabuf->bps[0]));
+               xfs_trans_log_buf(tp, dabuf->bps[0], first, last);
+               return;
+       }
+       dabuf->dirty = 1;
+       ASSERT(first <= last);
+       for (i = off = 0; i < dabuf->nbuf; i++, off += XFS_BUF_COUNT(bp)) {
+               bp = dabuf->bps[i];
+               f = off;
+               l = f + XFS_BUF_COUNT(bp) - 1;
+               if (f < first)
+                       f = first;
+               if (l > last)
+                       l = last;
+               if (f <= l)
+                       xfs_trans_log_buf(tp, bp, f - off, l - off);
+               /* 
+                * B_DONE is set by xfs_trans_log buf.
+                * If we don't set it on a new buffer (get not read)
+                * then if we don't put anything in the buffer it won't
+                * be set, and at commit it it released into the cache,
+                * and then a read will fail.
+                */
+               else if (!(XFS_BUF_ISDONE(bp)))
+                 XFS_BUF_DONE(bp);
+       }
+       ASSERT(last < off);
+}
+
+/*
+ * Release dabuf from a transaction.
+ * Have to free up the dabuf before the buffers are released,
+ * since the synchronization on the dabuf is really the lock on the buffer.
+ */
+void
+xfs_da_brelse(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
+{
+       xfs_buf_t       *bp;
+       xfs_buf_t       **bplist;
+       int     i;
+       int     nbuf;
+
+       ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
+       if ((nbuf = dabuf->nbuf) == 1) {
+               bplist = &bp;
+               bp = dabuf->bps[0];
+       } else {
+               bplist = kmem_alloc(nbuf * sizeof(*bplist), KM_SLEEP);
+               bcopy(dabuf->bps, bplist, nbuf * sizeof(*bplist));
+       }
+       xfs_da_buf_done(dabuf);
+       for (i = 0; i < nbuf; i++)
+               xfs_trans_brelse(tp, bplist[i]);
+       if (bplist != &bp)
+               kmem_free(bplist, nbuf * sizeof(*bplist));
+}
+
+/*
+ * Invalidate dabuf from a transaction.
+ */
+void
+xfs_da_binval(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
+{
+       xfs_buf_t       *bp;
+       xfs_buf_t       **bplist;
+       int     i;
+       int     nbuf;
+
+       ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
+       if ((nbuf = dabuf->nbuf) == 1) {
+               bplist = &bp;
+               bp = dabuf->bps[0];
+       } else {
+               bplist = kmem_alloc(nbuf * sizeof(*bplist), KM_SLEEP);
+               bcopy(dabuf->bps, bplist, nbuf * sizeof(*bplist));
+       }
+       xfs_da_buf_done(dabuf);
+       for (i = 0; i < nbuf; i++)
+               xfs_trans_binval(tp, bplist[i]);
+       if (bplist != &bp)
+               kmem_free(bplist, nbuf * sizeof(*bplist));
+}
diff --git a/libxfs/xfs_dir.c b/libxfs/xfs_dir.c
new file mode 100644 (file)
index 0000000..b13d246
--- /dev/null
@@ -0,0 +1,622 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <xfs.h>
+
+/*
+ * xfs_dir.c
+ *
+ * Provide the external interfaces to manage directories.
+ */
+
+
+xfs_dahash_t   xfs_dir_hash_dot, xfs_dir_hash_dotdot;
+
+/*
+ * One-time startup routine called from xfs_init().
+ */
+void
+xfs_dir_startup(void)
+{
+       xfs_dir_hash_dot = xfs_da_hashname(".", 1);
+       xfs_dir_hash_dotdot = xfs_da_hashname("..", 2);
+}
+
+/*
+ * Initialize directory-related fields in the mount structure.
+ */
+STATIC void
+xfs_dir_mount(xfs_mount_t *mp)
+{
+       uint shortcount, leafcount, count;
+
+       mp->m_dirversion = 1;
+       shortcount = (mp->m_attroffset - (uint)sizeof(xfs_dir_sf_hdr_t)) /
+                    (uint)sizeof(xfs_dir_sf_entry_t);
+       leafcount = (XFS_LBSIZE(mp) - (uint)sizeof(xfs_dir_leaf_hdr_t)) /
+                   ((uint)sizeof(xfs_dir_leaf_entry_t) +
+                    (uint)sizeof(xfs_dir_leaf_name_t));
+       count = shortcount > leafcount ? shortcount : leafcount;
+       mp->m_dircook_elog = xfs_da_log2_roundup(count + 1);
+       ASSERT(mp->m_dircook_elog <= mp->m_sb.sb_blocklog);
+       mp->m_da_node_ents =
+               (XFS_LBSIZE(mp) - (uint)sizeof(xfs_da_node_hdr_t)) /
+               (uint)sizeof(xfs_da_node_entry_t);
+       mp->m_dir_magicpct = (XFS_LBSIZE(mp) * 37) / 100;
+       mp->m_dirblksize = mp->m_sb.sb_blocksize;
+       mp->m_dirblkfsbs = 1;
+}
+
+/*
+ * Initialize a directory with its "." and ".." entries.
+ */
+STATIC int
+xfs_dir_init(xfs_trans_t *trans, xfs_inode_t *dir, xfs_inode_t *parent_dir)
+{
+       xfs_da_args_t args;
+       int error;
+
+       bzero((char *)&args, sizeof(args));
+       args.dp = dir;
+       args.trans = trans;
+
+       ASSERT((dir->i_d.di_mode & IFMT) == IFDIR);
+       if (error = xfs_dir_ino_validate(trans->t_mountp, parent_dir->i_ino))
+               return error;
+
+       return(xfs_dir_shortform_create(&args, parent_dir->i_ino));
+}
+
+/*
+ * Generic handler routine to add a name to a directory.
+ * Transitions directory from shortform to Btree as necessary.
+ */
+STATIC int                                             /* error */
+xfs_dir_createname(xfs_trans_t *trans, xfs_inode_t *dp, char *name,
+                  int namelen, xfs_ino_t inum, xfs_fsblock_t *firstblock,
+                  xfs_bmap_free_t *flist, xfs_extlen_t total)
+{
+       xfs_da_args_t args;
+       int retval, newsize, done;
+
+       ASSERT((dp->i_d.di_mode & IFMT) == IFDIR);
+
+       if (retval = xfs_dir_ino_validate(trans->t_mountp, inum))
+               return (retval);
+
+       XFS_STATS_INC(xs_dir_create);
+       /*
+        * Fill in the arg structure for this request.
+        */
+       args.name = name;
+       args.namelen = namelen;
+       args.hashval = xfs_da_hashname(name, namelen);
+       args.inumber = inum;
+       args.dp = dp;
+       args.firstblock = firstblock;
+       args.flist = flist;
+       args.total = total;
+       args.whichfork = XFS_DATA_FORK;
+       args.trans = trans;
+       args.justcheck = 0;
+       args.addname = args.oknoent = 1;
+
+       /*
+        * Decide on what work routines to call based on the inode size.
+        */
+       done = 0;
+       if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+               newsize = XFS_DIR_SF_ENTSIZE_BYNAME(args.namelen);
+               if ((dp->i_d.di_size + newsize) <= XFS_IFORK_DSIZE(dp)) {
+                       retval = xfs_dir_shortform_addname(&args);
+                       done = 1;
+               } else {
+                       if (total == 0)
+                               return XFS_ERROR(ENOSPC);
+                       retval = xfs_dir_shortform_to_leaf(&args);
+                       done = retval != 0;
+               }
+       }
+       if (!done && xfs_bmap_one_block(dp, XFS_DATA_FORK)) {
+               retval = xfs_dir_leaf_addname(&args);
+               done = retval != ENOSPC;
+               if (!done) {
+                       if (total == 0)
+                               return XFS_ERROR(ENOSPC);
+                       retval = xfs_dir_leaf_to_node(&args);
+                       done = retval != 0;
+               }
+       }
+       if (!done) {
+               retval = xfs_dir_node_addname(&args);
+       }
+       return(retval);
+}
+
+/*
+ * Generic handler routine to remove a name from a directory.
+ * Transitions directory from Btree to shortform as necessary.
+ */
+STATIC int                                                     /* error */
+xfs_dir_removename(xfs_trans_t *trans, xfs_inode_t *dp, char *name,
+                  int namelen, xfs_ino_t ino, xfs_fsblock_t *firstblock,
+                  xfs_bmap_free_t *flist, xfs_extlen_t total)
+{
+       xfs_da_args_t args;
+       int count, totallen, newsize, retval;
+
+       ASSERT((dp->i_d.di_mode & IFMT) == IFDIR);
+       XFS_STATS_INC(xs_dir_remove);
+       /*
+        * Fill in the arg structure for this request.
+        */
+       args.name = name;
+       args.namelen = namelen;
+       args.hashval = xfs_da_hashname(name, namelen);
+       args.inumber = ino;
+       args.dp = dp;
+       args.firstblock = firstblock;
+       args.flist = flist;
+       args.total = total;
+       args.whichfork = XFS_DATA_FORK;
+       args.trans = trans;
+       args.justcheck = args.addname = args.oknoent = 0;
+
+       /*
+        * Decide on what work routines to call based on the inode size.
+        */
+       if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+               retval = xfs_dir_shortform_removename(&args);
+       } else if (xfs_bmap_one_block(dp, XFS_DATA_FORK)) {
+               retval = xfs_dir_leaf_removename(&args, &count, &totallen);
+               if (retval == 0) {
+                       newsize = XFS_DIR_SF_ALLFIT(count, totallen);
+                       if (newsize <= XFS_IFORK_DSIZE(dp)) {
+                               retval = xfs_dir_leaf_to_shortform(&args);
+                       }
+               }
+       } else {
+               retval = xfs_dir_node_removename(&args);
+       }
+       return(retval);
+}
+
+STATIC int                                                     /* error */
+xfs_dir_lookup(xfs_trans_t *trans, xfs_inode_t *dp, char *name, int namelen,
+                                  xfs_ino_t *inum)
+{
+       xfs_da_args_t args;
+       int retval;
+
+       ASSERT((dp->i_d.di_mode & IFMT) == IFDIR);
+       if (namelen >= MAXNAMELEN) {
+               return(XFS_ERROR(EINVAL));
+       }
+
+       XFS_STATS_INC(xs_dir_lookup);
+       /*
+        * Fill in the arg structure for this request.
+        */
+       args.name = name;
+       args.namelen = namelen;
+       args.hashval = xfs_da_hashname(name, namelen);
+       args.inumber = 0;
+       args.dp = dp;
+       args.firstblock = NULL;
+       args.flist = NULL;
+       args.total = 0;
+       args.whichfork = XFS_DATA_FORK;
+       args.trans = trans;
+       args.justcheck = args.addname = 0;
+       args.oknoent = 1;
+
+       /*
+        * Decide on what work routines to call based on the inode size.
+        */
+       if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+               retval = xfs_dir_shortform_lookup(&args);
+       } else if (xfs_bmap_one_block(dp, XFS_DATA_FORK)) {
+               retval = xfs_dir_leaf_lookup(&args);
+       } else {
+               retval = xfs_dir_node_lookup(&args);
+       }
+       if (retval == EEXIST)
+               retval = 0;
+       *inum = args.inumber;
+       return(retval);
+}
+
+STATIC int                                                     /* error */
+xfs_dir_replace(xfs_trans_t *trans, xfs_inode_t *dp, char *name, int namelen,
+                                   xfs_ino_t inum, xfs_fsblock_t *firstblock,
+                                   xfs_bmap_free_t *flist, xfs_extlen_t total)
+{
+       xfs_da_args_t args;
+       int retval;
+
+       ASSERT((dp->i_d.di_mode & IFMT) == IFDIR);
+       if (namelen >= MAXNAMELEN) {
+               return(XFS_ERROR(EINVAL));
+       }
+
+       if (retval = xfs_dir_ino_validate(trans->t_mountp, inum))
+               return retval;
+
+       /*
+        * Fill in the arg structure for this request.
+        */
+       args.name = name;
+       args.namelen = namelen;
+       args.hashval = xfs_da_hashname(name, namelen);
+       args.inumber = inum;
+       args.dp = dp;
+       args.firstblock = firstblock;
+       args.flist = flist;
+       args.total = total;
+       args.whichfork = XFS_DATA_FORK;
+       args.trans = trans;
+       args.justcheck = args.addname = args.oknoent = 0;
+
+       /*
+        * Decide on what work routines to call based on the inode size.
+        */
+       if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+               retval = xfs_dir_shortform_replace(&args);
+       } else if (xfs_bmap_one_block(dp, XFS_DATA_FORK)) {
+               retval = xfs_dir_leaf_replace(&args);
+       } else {
+               retval = xfs_dir_node_replace(&args);
+       }
+
+       return(retval);
+}
+
+
+/*========================================================================
+ * External routines when dirsize == XFS_LBSIZE(dp->i_mount).
+ *========================================================================*/
+
+/*
+ * Add a name to the leaf directory structure
+ * This is the external routine.
+ */
+int
+xfs_dir_leaf_addname(xfs_da_args_t *args)
+{
+       int index, retval;
+       xfs_dabuf_t *bp;
+
+       retval = xfs_da_read_buf(args->trans, args->dp, 0, -1, &bp,
+                                             XFS_DATA_FORK);
+       if (retval)
+               return(retval);
+       ASSERT(bp != NULL);
+
+       retval = xfs_dir_leaf_lookup_int(bp, args, &index);
+       if (retval == ENOENT)
+               retval = xfs_dir_leaf_add(bp, args, index);
+       xfs_da_buf_done(bp);
+       return(retval);
+}
+
+/*
+ * Remove a name from the leaf directory structure
+ * This is the external routine.
+ */
+STATIC int
+xfs_dir_leaf_removename(xfs_da_args_t *args, int *count, int *totallen)
+{
+       xfs_dir_leafblock_t *leaf;
+       int index, retval;
+       xfs_dabuf_t *bp;
+
+       retval = xfs_da_read_buf(args->trans, args->dp, 0, -1, &bp,
+                                             XFS_DATA_FORK);
+       if (retval)
+               return(retval);
+       ASSERT(bp != NULL);
+       leaf = bp->data;
+       ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+       retval = xfs_dir_leaf_lookup_int(bp, args, &index);
+       if (retval == EEXIST) {
+               (void)xfs_dir_leaf_remove(args->trans, bp, index);
+               *count = INT_GET(leaf->hdr.count, ARCH_CONVERT);
+               *totallen = INT_GET(leaf->hdr.namebytes, ARCH_CONVERT);
+               retval = 0;
+       }
+       xfs_da_buf_done(bp);
+       return(retval);
+}
+
+/*
+ * Look up a name in a leaf directory structure.
+ * This is the external routine.
+ */
+STATIC int
+xfs_dir_leaf_lookup(xfs_da_args_t *args)
+{
+       int index, retval;
+       xfs_dabuf_t *bp;
+
+       retval = xfs_da_read_buf(args->trans, args->dp, 0, -1, &bp,
+                                             XFS_DATA_FORK);
+       if (retval)
+               return(retval);
+       ASSERT(bp != NULL);
+       retval = xfs_dir_leaf_lookup_int(bp, args, &index);
+       xfs_da_brelse(args->trans, bp);
+       return(retval);
+}
+
+/*
+ * Look up a name in a leaf directory structure, replace the inode number.
+ * This is the external routine.
+ */
+STATIC int
+xfs_dir_leaf_replace(xfs_da_args_t *args)
+{
+       int index, retval;
+       xfs_dabuf_t *bp;
+       xfs_ino_t inum;
+       xfs_dir_leafblock_t *leaf;
+       xfs_dir_leaf_entry_t *entry;
+       xfs_dir_leaf_name_t *namest;
+
+       inum = args->inumber;
+       retval = xfs_da_read_buf(args->trans, args->dp, 0, -1, &bp,
+                                             XFS_DATA_FORK);
+       if (retval)
+               return(retval);
+       ASSERT(bp != NULL);
+       retval = xfs_dir_leaf_lookup_int(bp, args, &index);
+       if (retval == EEXIST) {
+               leaf = bp->data;
+               entry = &leaf->entries[index];
+               namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT));
+                /* XXX - replace assert? */
+               XFS_DIR_SF_PUT_DIRINO_ARCH(&inum, &namest->inumber, ARCH_CONVERT);
+               xfs_da_log_buf(args->trans, bp, 
+                   XFS_DA_LOGRANGE(leaf, namest, sizeof(namest->inumber)));
+               xfs_da_buf_done(bp);
+               retval = 0;
+       } else
+               xfs_da_brelse(args->trans, bp);
+       return(retval);
+}
+
+
+/*========================================================================
+ * External routines when dirsize > XFS_LBSIZE(mp).
+ *========================================================================*/
+
+/*
+ * Add a name to a Btree-format directory.
+ *
+ * This will involve walking down the Btree, and may involve splitting
+ * leaf nodes and even splitting intermediate nodes up to and including
+ * the root node (a special case of an intermediate node).
+ */
+STATIC int
+xfs_dir_node_addname(xfs_da_args_t *args)
+{
+       xfs_da_state_t *state;
+       xfs_da_state_blk_t *blk;
+       int retval, error;
+
+       /*
+        * Fill in bucket of arguments/results/context to carry around.
+        */
+       state = xfs_da_state_alloc();
+       state->args = args;
+       state->mp = args->dp->i_mount;
+       state->blocksize = state->mp->m_sb.sb_blocksize;
+
+       /*
+        * Search to see if name already exists, and get back a pointer
+        * to where it should go.
+        */
+       error = xfs_da_node_lookup_int(state, &retval);
+       if (error)
+               retval = error;
+       if (retval != ENOENT)
+               goto error;
+       blk = &state->path.blk[ state->path.active-1 ];
+       ASSERT(blk->magic == XFS_DIR_LEAF_MAGIC);
+       retval = xfs_dir_leaf_add(blk->bp, args, blk->index);
+       if (retval == 0) {
+               /*
+                * Addition succeeded, update Btree hashvals.
+                */
+               if (!args->justcheck)
+                       xfs_da_fixhashpath(state, &state->path);
+       } else {
+               /*
+                * Addition failed, split as many Btree elements as required.
+                */
+               if (args->total == 0) {
+                       ASSERT(retval == ENOSPC);
+                       goto error;
+               }
+               retval = xfs_da_split(state);
+       }
+error:
+       xfs_da_state_free(state);
+
+       return(retval);
+}
+
+/*
+ * Remove a name from a B-tree directory.
+ *
+ * This will involve walking down the Btree, and may involve joining
+ * leaf nodes and even joining intermediate nodes up to and including
+ * the root node (a special case of an intermediate node).
+ */
+STATIC int
+xfs_dir_node_removename(xfs_da_args_t *args)
+{
+       xfs_da_state_t *state;
+       xfs_da_state_blk_t *blk;
+       int retval, error;
+
+       state = xfs_da_state_alloc();
+       state->args = args;
+       state->mp = args->dp->i_mount;
+       state->blocksize = state->mp->m_sb.sb_blocksize;
+
+       /*
+        * Search to see if name exists, and get back a pointer to it.
+        */
+       error = xfs_da_node_lookup_int(state, &retval);
+       if (error)
+               retval = error;
+       if (retval != EEXIST) {
+               xfs_da_state_free(state);
+               return(retval);
+       }
+
+       /*
+        * Remove the name and update the hashvals in the tree.
+        */
+       blk = &state->path.blk[ state->path.active-1 ];
+       ASSERT(blk->magic == XFS_DIR_LEAF_MAGIC);
+       retval = xfs_dir_leaf_remove(args->trans, blk->bp, blk->index);
+       xfs_da_fixhashpath(state, &state->path);
+
+       /*
+        * Check to see if the tree needs to be collapsed.
+        */
+       error = 0;
+       if (retval) {
+               error = xfs_da_join(state);
+       }
+
+       xfs_da_state_free(state);
+       if (error)
+               return(error);
+       return(0);
+}
+
+/*
+ * Look up a filename in a int directory.
+ * Use an internal routine to actually do all the work.
+ */
+STATIC int
+xfs_dir_node_lookup(xfs_da_args_t *args)
+{
+       xfs_da_state_t *state;
+       int retval, error, i;
+
+       state = xfs_da_state_alloc();
+       state->args = args;
+       state->mp = args->dp->i_mount;
+       state->blocksize = state->mp->m_sb.sb_blocksize;
+
+       /*
+        * Search to see if name exists,
+        * and get back a pointer to it.
+        */
+       error = xfs_da_node_lookup_int(state, &retval);
+       if (error) {
+               retval = error;
+       }
+
+       /* 
+        * If not in a transaction, we have to release all the buffers.
+        */
+       for (i = 0; i < state->path.active; i++) {
+               xfs_da_brelse(args->trans, state->path.blk[i].bp);
+               state->path.blk[i].bp = NULL;
+       }
+
+       xfs_da_state_free(state);
+       return(retval);
+}
+
+/*
+ * Look up a filename in an int directory, replace the inode number.
+ * Use an internal routine to actually do the lookup.
+ */
+STATIC int
+xfs_dir_node_replace(xfs_da_args_t *args)
+{
+       xfs_da_state_t *state;
+       xfs_da_state_blk_t *blk;
+       xfs_dir_leafblock_t *leaf;
+       xfs_dir_leaf_entry_t *entry;
+       xfs_dir_leaf_name_t *namest;
+       xfs_ino_t inum;
+       int retval, error, i;
+       xfs_dabuf_t *bp;
+
+       state = xfs_da_state_alloc();
+       state->args = args;
+       state->mp = args->dp->i_mount;
+       state->blocksize = state->mp->m_sb.sb_blocksize;
+       inum = args->inumber;
+
+       /*
+        * Search to see if name exists,
+        * and get back a pointer to it.
+        */
+       error = xfs_da_node_lookup_int(state, &retval);
+       if (error) {
+               retval = error;
+       }
+
+       if (retval == EEXIST) {
+               blk = &state->path.blk[state->path.active - 1];
+               ASSERT(blk->magic == XFS_DIR_LEAF_MAGIC);
+               bp = blk->bp;
+               leaf = bp->data;
+               entry = &leaf->entries[blk->index];
+               namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT));
+                /* XXX - replace assert ? */
+               XFS_DIR_SF_PUT_DIRINO_ARCH(&inum, &namest->inumber, ARCH_CONVERT);
+               xfs_da_log_buf(args->trans, bp,
+                   XFS_DA_LOGRANGE(leaf, namest, sizeof(namest->inumber)));
+               xfs_da_buf_done(bp);
+               blk->bp = NULL;
+               retval = 0;
+       } else {
+               i = state->path.active - 1;
+               xfs_da_brelse(args->trans, state->path.blk[i].bp);
+               state->path.blk[i].bp = NULL;
+       }
+       for (i = 0; i < state->path.active - 1; i++) {
+               xfs_da_brelse(args->trans, state->path.blk[i].bp);
+               state->path.blk[i].bp = NULL;
+       }
+
+       xfs_da_state_free(state);
+       return(retval);
+}
diff --git a/libxfs/xfs_dir2.c b/libxfs/xfs_dir2.c
new file mode 100644 (file)
index 0000000..72acbb3
--- /dev/null
@@ -0,0 +1,594 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+/*
+ * XFS v2 directory implmentation.
+ * Top-level and utility routines.
+ */
+
+#include <xfs.h>
+
+
+/*
+ * Initialize directory-related fields in the mount structure.
+ */
+void
+xfs_dir2_mount(
+       xfs_mount_t     *mp)            /* filesystem mount point */
+{
+       mp->m_dirversion = 2;
+       ASSERT((1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) <=
+              XFS_MAX_BLOCKSIZE);
+       mp->m_dirblksize = 1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog);
+       mp->m_dirblkfsbs = 1 << mp->m_sb.sb_dirblklog;
+       mp->m_dirdatablk = XFS_DIR2_DB_TO_DA(mp, XFS_DIR2_DATA_FIRSTDB(mp));
+       mp->m_dirleafblk = XFS_DIR2_DB_TO_DA(mp, XFS_DIR2_LEAF_FIRSTDB(mp));
+       mp->m_dirfreeblk = XFS_DIR2_DB_TO_DA(mp, XFS_DIR2_FREE_FIRSTDB(mp));
+       mp->m_da_node_ents =
+               (mp->m_dirblksize - (uint)sizeof(xfs_da_node_hdr_t)) /
+               (uint)sizeof(xfs_da_node_entry_t);
+       mp->m_dir_magicpct = (mp->m_dirblksize * 37) / 100;
+}
+
+/*
+ * Initialize a directory with its "." and ".." entries.
+ */
+int                            /* error */
+xfs_dir2_init(
+       xfs_trans_t     *tp,            /* transaction pointer */
+       xfs_inode_t     *dp,            /* incore directory inode */
+       xfs_inode_t     *pdp)           /* incore parent directory inode */
+{
+       xfs_da_args_t   args;           /* operation arguments */
+       int             error;          /* error return value */
+
+       bzero((char *)&args, sizeof(args));
+       args.dp = dp;
+       args.trans = tp;
+       ASSERT((dp->i_d.di_mode & IFMT) == IFDIR);
+       if (error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino)) {
+#pragma mips_frequency_hint NEVER
+               return error;
+       }
+       return xfs_dir2_sf_create(&args, pdp->i_ino);
+}
+
+/*
+  Enter a name in a directory.
+ */
+STATIC int                                     /* error */
+xfs_dir2_createname(
+       xfs_trans_t             *tp,            /* transaction pointer */
+       xfs_inode_t             *dp,            /* incore directory inode */
+       char                    *name,          /* new entry name */
+       int                     namelen,        /* new entry name length */
+       xfs_ino_t               inum,           /* new entry inode number */
+       xfs_fsblock_t           *first,         /* bmap's firstblock */
+       xfs_bmap_free_t         *flist,         /* bmap's freeblock list */
+       xfs_extlen_t            total)          /* bmap's total block count */
+{
+       xfs_da_args_t           args;           /* operation arguments */
+       int                     rval;           /* return value */
+       int                     v;              /* type-checking value */
+
+       ASSERT((dp->i_d.di_mode & IFMT) == IFDIR);
+       if (rval = xfs_dir_ino_validate(tp->t_mountp, inum)) {
+#pragma mips_frequency_hint NEVER
+               return rval;
+       }
+       XFS_STATS_INC(xs_dir_create);
+       /*
+        * Fill in the arg structure for this request.
+        */
+       args.name = name;
+       args.namelen = namelen;
+       args.hashval = xfs_da_hashname(name, namelen);
+       args.inumber = inum;
+       args.dp = dp;
+       args.firstblock = first;
+       args.flist = flist;
+       args.total = total;
+       args.whichfork = XFS_DATA_FORK;
+       args.trans = tp;
+       args.justcheck = 0;
+       args.addname = args.oknoent = 1;
+       /*
+        * Decide on what work routines to call based on the inode size.
+        */
+       if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
+               rval = xfs_dir2_sf_addname(&args);
+       else if (rval = xfs_dir2_isblock(tp, dp, &v)) {
+#pragma mips_frequency_hint NEVER
+               return rval;
+       } else if (v)
+               rval = xfs_dir2_block_addname(&args);
+       else if (rval = xfs_dir2_isleaf(tp, dp, &v)) {
+#pragma mips_frequency_hint NEVER
+               return rval;
+       } else if (v)
+               rval = xfs_dir2_leaf_addname(&args);
+       else
+               rval = xfs_dir2_node_addname(&args);
+       return rval;
+}
+
+/*
+ * Lookup a name in a directory, give back the inode number.
+ */
+STATIC int                             /* error */
+xfs_dir2_lookup(
+       xfs_trans_t     *tp,            /* transaction pointer */
+       xfs_inode_t     *dp,            /* incore directory inode */
+       char            *name,          /* lookup name */
+       int             namelen,        /* lookup name length */
+       xfs_ino_t       *inum)          /* out: inode number */
+{
+       xfs_da_args_t   args;           /* operation arguments */
+       int             rval;           /* return value */
+       int             v;              /* type-checking value */
+
+       ASSERT((dp->i_d.di_mode & IFMT) == IFDIR);
+       if (namelen >= MAXNAMELEN) {
+#pragma mips_frequency_hint NEVER
+               return XFS_ERROR(EINVAL);
+       }
+       XFS_STATS_INC(xs_dir_lookup);
+       /*
+        * Fill in the arg structure for this request.
+        */
+       args.name = name;
+       args.namelen = namelen;
+       args.hashval = xfs_da_hashname(name, namelen);
+       args.inumber = 0;
+       args.dp = dp;
+       args.firstblock = NULL;
+       args.flist = NULL;
+       args.total = 0;
+       args.whichfork = XFS_DATA_FORK;
+       args.trans = tp;
+       args.justcheck = args.addname = 0;
+       args.oknoent = 1;
+       /*
+        * Decide on what work routines to call based on the inode size.
+        */
+       if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
+               rval = xfs_dir2_sf_lookup(&args);
+       else if (rval = xfs_dir2_isblock(tp, dp, &v)) {
+#pragma mips_frequency_hint NEVER
+               return rval;
+       } else if (v)
+               rval = xfs_dir2_block_lookup(&args);
+       else if (rval = xfs_dir2_isleaf(tp, dp, &v)) {
+#pragma mips_frequency_hint NEVER
+               return rval;
+       } else if (v)
+               rval = xfs_dir2_leaf_lookup(&args);
+       else
+               rval = xfs_dir2_node_lookup(&args);
+       if (rval == EEXIST)
+               rval = 0;
+       if (rval == 0)
+               *inum = args.inumber;
+       return rval;
+}
+
+/*
+ * Remove an entry from a directory.
+ */
+STATIC int                             /* error */
+xfs_dir2_removename(
+       xfs_trans_t     *tp,            /* transaction pointer */
+       xfs_inode_t     *dp,            /* incore directory inode */
+       char            *name,          /* name of entry to remove */
+       int             namelen,        /* name length of entry to remove */
+       xfs_ino_t       ino,            /* inode number of entry to remove */
+       xfs_fsblock_t   *first,         /* bmap's firstblock */
+       xfs_bmap_free_t *flist,         /* bmap's freeblock list */
+       xfs_extlen_t    total)          /* bmap's total block count */
+{
+       xfs_da_args_t   args;           /* operation arguments */
+       int             rval;           /* return value */
+       int             v;              /* type-checking value */
+
+       ASSERT((dp->i_d.di_mode & IFMT) == IFDIR);
+       XFS_STATS_INC(xs_dir_remove);
+       /*
+        * Fill in the arg structure for this request.
+        */
+       args.name = name;
+       args.namelen = namelen;
+       args.hashval = xfs_da_hashname(name, namelen);
+       args.inumber = ino;
+       args.dp = dp;
+       args.firstblock = first;
+       args.flist = flist;
+       args.total = total;
+       args.whichfork = XFS_DATA_FORK;
+       args.trans = tp;
+       args.justcheck = args.addname = args.oknoent = 0;
+       /*
+        * Decide on what work routines to call based on the inode size.
+        */
+       if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
+               rval = xfs_dir2_sf_removename(&args);
+       else if (rval = xfs_dir2_isblock(tp, dp, &v)) {
+#pragma mips_frequency_hint NEVER
+               return rval;
+       } else if (v)
+               rval = xfs_dir2_block_removename(&args);
+       else if (rval = xfs_dir2_isleaf(tp, dp, &v)) {
+#pragma mips_frequency_hint NEVER
+               return rval;
+       } else if (v)
+               rval = xfs_dir2_leaf_removename(&args);
+       else
+               rval = xfs_dir2_node_removename(&args);
+       return rval;
+}
+
+/*
+ * Replace the inode number of a directory entry.
+ */
+STATIC int                             /* error */
+xfs_dir2_replace(
+       xfs_trans_t     *tp,            /* transaction pointer */
+       xfs_inode_t     *dp,            /* incore directory inode */
+       char            *name,          /* name of entry to replace */
+       int             namelen,        /* name length of entry to replace */
+       xfs_ino_t       inum,           /* new inode number */
+       xfs_fsblock_t   *first,         /* bmap's firstblock */
+       xfs_bmap_free_t *flist,         /* bmap's freeblock list */
+       xfs_extlen_t    total)          /* bmap's total block count */
+{
+       xfs_da_args_t   args;           /* operation arguments */
+       int             rval;           /* return value */
+       int             v;              /* type-checking value */
+
+       ASSERT((dp->i_d.di_mode & IFMT) == IFDIR);
+       if (namelen >= MAXNAMELEN) {
+#pragma mips_frequency_hint NEVER
+               return XFS_ERROR(EINVAL);
+       }
+       if (rval = xfs_dir_ino_validate(tp->t_mountp, inum)) {
+#pragma mips_frequency_hint NEVER
+               return rval;
+       }
+       /*
+        * Fill in the arg structure for this request.
+        */
+       args.name = name;
+       args.namelen = namelen;
+       args.hashval = xfs_da_hashname(name, namelen);
+       args.inumber = inum;
+       args.dp = dp;
+       args.firstblock = first;
+       args.flist = flist;
+       args.total = total;
+       args.whichfork = XFS_DATA_FORK;
+       args.trans = tp;
+       args.justcheck = args.addname = args.oknoent = 0;
+       /*
+        * Decide on what work routines to call based on the inode size.
+        */
+       if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
+               rval = xfs_dir2_sf_replace(&args);
+       else if (rval = xfs_dir2_isblock(tp, dp, &v)) {
+#pragma mips_frequency_hint NEVER
+               return rval;
+       } else if (v)
+               rval = xfs_dir2_block_replace(&args);
+       else if (rval = xfs_dir2_isleaf(tp, dp, &v)) {
+#pragma mips_frequency_hint NEVER
+               return rval;
+       } else if (v)
+               rval = xfs_dir2_leaf_replace(&args);
+       else
+               rval = xfs_dir2_node_replace(&args);
+       return rval;
+}
+
+/*
+ * Utility routines.
+ */
+
+/*
+ * Add a block to the directory.
+ * This routine is for data and free blocks, not leaf/node blocks
+ * which are handled by xfs_da_grow_inode.
+ */
+int                                    /* error */
+xfs_dir2_grow_inode(
+       xfs_da_args_t   *args,          /* operation arguments */
+       int             space,          /* v2 dir's space XFS_DIR2_xxx_SPACE */
+       xfs_dir2_db_t   *dbp)           /* out: block number added */
+{
+       xfs_fileoff_t   bno;            /* directory offset of new block */
+       int             count;          /* count of filesystem blocks */
+       xfs_inode_t     *dp;            /* incore directory inode */
+       int             error;          /* error return value */
+       int             got;            /* blocks actually mapped */
+       int             i;              /* temp mapping index */
+       xfs_bmbt_irec_t map;            /* single structure for bmap */
+       int             mapi;           /* mapping index */
+       xfs_bmbt_irec_t *mapp;          /* bmap mapping structure(s) */
+       xfs_mount_t     *mp;            /* filesystem mount point */
+       int             nmap;           /* number of bmap entries */
+       xfs_trans_t     *tp;            /* transaction pointer */
+
+       xfs_dir2_trace_args_s("grow_inode", args, space);
+       dp = args->dp;
+       tp = args->trans;
+       mp = dp->i_mount;
+       /*
+        * Set lowest possible block in the space requested.
+        */
+       bno = XFS_B_TO_FSBT(mp, space * XFS_DIR2_SPACE_SIZE);
+       count = mp->m_dirblkfsbs;
+       /*
+        * Find the first hole for our block.
+        */
+       if (error = xfs_bmap_first_unused(tp, dp, count, &bno, XFS_DATA_FORK)) {
+#pragma mips_frequency_hint NEVER
+               return error;
+       }
+       nmap = 1;
+       ASSERT(args->firstblock != NULL);
+       /*
+        * Try mapping the new block contiguously (one extent).
+        */
+       if (error = xfs_bmapi(tp, dp, bno, count,
+                       XFS_BMAPI_WRITE|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG,
+                       args->firstblock, args->total, &map, &nmap,
+                       args->flist)) {
+#pragma mips_frequency_hint NEVER
+               return error;
+       }
+       ASSERT(nmap <= 1);
+       /*
+        * Got it in 1.
+        */
+       if (nmap == 1) {
+               mapp = &map;
+               mapi = 1;
+       }
+       /*
+        * Didn't work and this is a multiple-fsb directory block.
+        * Try again with contiguous flag turned on.
+        */
+       else if (nmap == 0 && count > 1) {
+#pragma mips_frequency_hint NEVER
+               xfs_fileoff_t   b;      /* current file offset */
+
+               /*
+                * Space for maximum number of mappings.
+                */
+               mapp = kmem_alloc(sizeof(*mapp) * count, KM_SLEEP);
+               /*
+                * Iterate until we get to the end of our block.
+                */
+               for (b = bno, mapi = 0; b < bno + count; ) {
+                       int     c;      /* current fsb count */
+
+                       /*
+                        * Can't map more than MAX_NMAP at once.
+                        */
+                       nmap = MIN(XFS_BMAP_MAX_NMAP, count);
+                       c = (int)(bno + count - b);
+                       if (error = xfs_bmapi(tp, dp, b, c,
+                                       XFS_BMAPI_WRITE|XFS_BMAPI_METADATA,
+                                       args->firstblock, args->total,
+                                       &mapp[mapi], &nmap, args->flist)) {
+                               kmem_free(mapp, sizeof(*mapp) * count);
+                               return error;
+                       }
+                       if (nmap < 1)
+                               break;
+                       /*
+                        * Add this bunch into our table, go to the next offset.
+                        */
+                       mapi += nmap;
+                       b = mapp[mapi - 1].br_startoff +
+                           mapp[mapi - 1].br_blockcount;
+               }
+       }
+       /*
+        * Didn't work.
+        */
+       else {
+#pragma mips_frequency_hint NEVER
+               mapi = 0;
+               mapp = NULL;
+       }
+       /*
+        * See how many fsb's we got.
+        */
+       for (i = 0, got = 0; i < mapi; i++)
+               got += mapp[i].br_blockcount;
+       /*
+        * Didn't get enough fsb's, or the first/last block's are wrong.
+        */
+       if (got != count || mapp[0].br_startoff != bno ||
+           mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount !=
+           bno + count) {
+#pragma mips_frequency_hint NEVER
+               if (mapp != &map)
+                       kmem_free(mapp, sizeof(*mapp) * count);
+               return XFS_ERROR(ENOSPC);
+       }
+       /*
+        * Done with the temporary mapping table.
+        */
+       if (mapp != &map)
+               kmem_free(mapp, sizeof(*mapp) * count);
+       *dbp = XFS_DIR2_DA_TO_DB(mp, (xfs_dablk_t)bno);
+       /*
+        * Update file's size if this is the data space and it grew.
+        */
+       if (space == XFS_DIR2_DATA_SPACE) {
+               xfs_fsize_t     size;           /* directory file (data) size */
+
+               size = XFS_FSB_TO_B(mp, bno + count);
+               if (size > dp->i_d.di_size) {
+                       dp->i_d.di_size = size;
+                       xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
+               }
+       }
+       return 0;
+}
+
+/*
+ * See if the directory is a single-block form directory.
+ */
+int                                    /* error */
+xfs_dir2_isblock(
+       xfs_trans_t     *tp,            /* transaction pointer */
+       xfs_inode_t     *dp,            /* incore directory inode */
+       int             *vp)            /* out: 1 is block, 0 is not block */
+{
+       xfs_fileoff_t   last;           /* last file offset */
+       xfs_mount_t     *mp;            /* filesystem mount point */
+       int             rval;           /* return value */
+
+       mp = dp->i_mount;
+       if (rval = xfs_bmap_last_offset(tp, dp, &last, XFS_DATA_FORK)) {
+#pragma mips_frequency_hint NEVER
+               return rval;
+       }
+       rval = XFS_FSB_TO_B(mp, last) == mp->m_dirblksize;
+       ASSERT(rval == 0 || dp->i_d.di_size == mp->m_dirblksize);
+       *vp = rval;
+       return 0;
+}
+
+/*
+ * See if the directory is a single-leaf form directory.
+ */
+int                                    /* error */
+xfs_dir2_isleaf(
+       xfs_trans_t     *tp,            /* transaction pointer */
+       xfs_inode_t     *dp,            /* incore directory inode */
+       int             *vp)            /* out: 1 is leaf, 0 is not leaf */
+{
+       xfs_fileoff_t   last;           /* last file offset */
+       xfs_mount_t     *mp;            /* filesystem mount point */
+       int             rval;           /* return value */
+
+       mp = dp->i_mount;
+       if (rval = xfs_bmap_last_offset(tp, dp, &last, XFS_DATA_FORK)) {
+#pragma mips_frequency_hint NEVER
+               return rval;
+       }
+       *vp = last == mp->m_dirleafblk + (1 << mp->m_sb.sb_dirblklog);
+       return 0;
+}
+
+/*
+ * Remove the given block from the directory.
+ * This routine is used for data and free blocks, leaf/node are done
+ * by xfs_da_shrink_inode.
+ */
+int
+xfs_dir2_shrink_inode(
+       xfs_da_args_t   *args,          /* operation arguments */
+       xfs_dir2_db_t   db,             /* directory block number */
+       xfs_dabuf_t     *bp)            /* block's buffer */
+{
+       xfs_fileoff_t   bno;            /* directory file offset */
+       xfs_dablk_t     da;             /* directory file offset */
+       int             done;           /* bunmap is finished */
+       xfs_inode_t     *dp;            /* incore directory inode */
+       int             error;          /* error return value */
+       xfs_mount_t     *mp;            /* filesystem mount point */
+       xfs_trans_t     *tp;            /* transaction pointer */
+
+       xfs_dir2_trace_args_db("shrink_inode", args, db, bp);
+       dp = args->dp;
+       mp = dp->i_mount;
+       tp = args->trans;
+       da = XFS_DIR2_DB_TO_DA(mp, db);
+       /*
+        * Unmap the fsblock(s).
+        */
+       if (error = xfs_bunmapi(tp, dp, da, mp->m_dirblkfsbs,
+                       XFS_BMAPI_METADATA, 0, args->firstblock, args->flist,
+                       &done)) {
+#pragma mips_frequency_hint NEVER
+               /*
+                * ENOSPC actually can happen if we're in a removename with
+                * no space reservation, and the resulting block removal
+                * would cause a bmap btree split or conversion from extents
+                * to btree.  This can only happen for un-fragmented
+                * directory blocks, since you need to be punching out
+                * the middle of an extent.
+                * In this case we need to leave the block in the file,
+                * and not binval it.
+                * So the block has to be in a consistent empty state
+                * and appropriately logged.
+                * We don't free up the buffer, the caller can tell it 
+                * hasn't happened since it got an error back.
+                */
+               return error;
+       }
+       ASSERT(done);
+       /*
+        * Invalidate the buffer from the transaction.
+        */
+       xfs_da_binval(tp, bp);
+       /*
+        * If it's not a data block, we're done.
+        */
+       if (db >= XFS_DIR2_LEAF_FIRSTDB(mp)) 
+               return 0;
+       /*
+        * If the block isn't the last one in the directory, we're done.
+        */
+       if (dp->i_d.di_size > XFS_DIR2_DB_OFF_TO_BYTE(mp, db + 1, 0))
+               return 0;
+       bno = da;
+       if (error = xfs_bmap_last_before(tp, dp, &bno, XFS_DATA_FORK)) {
+#pragma mips_frequency_hint NEVER
+               /*
+                * This can't really happen unless there's kernel corruption.
+                */
+               return error;
+       }
+       if (db == mp->m_dirdatablk)
+               ASSERT(bno == 0);
+       else
+               ASSERT(bno > 0);
+       /*
+        * Set the size to the new last block.
+        */
+       dp->i_d.di_size = XFS_FSB_TO_B(mp, bno);
+       xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
+       return 0;
+}
diff --git a/libxfs/xfs_dir2_block.c b/libxfs/xfs_dir2_block.c
new file mode 100644 (file)
index 0000000..9cf6773
--- /dev/null
@@ -0,0 +1,1094 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+/*
+ * xfs_dir2_block.c
+ * XFS V2 directory implementation, single-block form.
+ * See xfs_dir2_block.h for the format.
+ */
+
+#include <xfs.h>
+
+/*
+ * Add an entry to a block directory.
+ */
+int                                            /* error */
+xfs_dir2_block_addname(
+       xfs_da_args_t           *args)          /* directory op arguments */
+{
+       xfs_dir2_data_free_t    *bf;            /* bestfree table in block */
+       xfs_dir2_block_t        *block;         /* directory block structure */
+       xfs_dir2_leaf_entry_t   *blp;           /* block leaf entries */
+       xfs_dabuf_t             *bp;            /* buffer for block */
+       xfs_dir2_block_tail_t   *btp;           /* block tail */
+       int                     compact;        /* need to compact leaf ents */
+       xfs_dir2_data_entry_t   *dep;           /* block data entry */
+       xfs_inode_t             *dp;            /* directory inode */
+       xfs_dir2_data_unused_t  *dup;           /* block unused entry */
+       int                     error;          /* error return value */
+       xfs_dir2_data_unused_t  *enddup;        /* unused at end of data */
+       xfs_dahash_t            hash;           /* hash value of found entry */
+       int                     high;           /* high index for binary srch */
+       int                     highstale;      /* high stale index */
+       int                     lfloghigh;      /* last final leaf to log */
+       int                     lfloglow;       /* first final leaf to log */
+       int                     len;            /* length of the new entry */
+       int                     low;            /* low index for binary srch */
+       int                     lowstale;       /* low stale index */
+       int                     mid;            /* midpoint for binary srch */
+       xfs_mount_t             *mp;            /* filesystem mount point */
+       int                     needlog;        /* need to log header */
+       int                     needscan;       /* need to rescan freespace */
+       xfs_dir2_data_off_t     *tagp;          /* pointer to tag value */
+       xfs_trans_t             *tp;            /* transaction structure */
+
+       xfs_dir2_trace_args("block_addname", args);
+       dp = args->dp;
+       tp = args->trans;
+       mp = dp->i_mount;
+       /*
+        * Read the (one and only) directory block into dabuf bp.
+        */
+       if (error =
+           xfs_da_read_buf(tp, dp, mp->m_dirdatablk, -1, &bp, XFS_DATA_FORK)) {
+#pragma mips_frequency_hint NEVER
+               return error;
+       }
+       ASSERT(bp != NULL);
+       block = bp->data;
+       /*
+        * Check the magic number, corrupted if wrong.
+        */
+       if (INT_GET(block->hdr.magic, ARCH_CONVERT) != XFS_DIR2_BLOCK_MAGIC) {
+#pragma mips_frequency_hint NEVER
+               xfs_da_brelse(tp, bp);
+               return XFS_ERROR(EFSCORRUPTED);
+       }
+       len = XFS_DIR2_DATA_ENTSIZE(args->namelen);
+       /*
+        * Set up pointers to parts of the block.
+        */
+       bf = block->hdr.bestfree;
+       btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+       blp = XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT);
+       /*
+        * No stale entries?  Need space for entry and new leaf.
+        */
+       if (INT_GET(btp->stale, ARCH_CONVERT) == 0) {
+               /*
+                * Tag just before the first leaf entry.
+                */
+               tagp = (xfs_dir2_data_off_t *)blp - 1;
+               /*
+                * Data object just before the first leaf entry.
+                */
+               enddup = (xfs_dir2_data_unused_t *)((char *)block + INT_GET(*tagp, ARCH_CONVERT));
+               /*
+                * If it's not free then can't do this add without cleaning up:
+                * the space before the first leaf entry needs to be free so it
+                * can be expanded to hold the pointer to the new entry.
+                */
+               if (INT_GET(enddup->freetag, ARCH_CONVERT) != XFS_DIR2_DATA_FREE_TAG)
+                       dup = enddup = NULL;
+               /*
+                * Check out the biggest freespace and see if it's the same one.
+                */
+               else {
+                       dup = (xfs_dir2_data_unused_t *)
+                             ((char *)block + INT_GET(bf[0].offset, ARCH_CONVERT));
+                       if (dup == enddup) {
+                               /*
+                                * It is the biggest freespace, is it too small
+                                * to hold the new leaf too?
+                                */
+                               if (INT_GET(dup->length, ARCH_CONVERT) < len + (uint)sizeof(*blp)) {
+#pragma mips_frequency_hint NEVER
+                                       /*
+                                        * Yes, we use the second-largest
+                                        * entry instead if it works.
+                                        */
+                                       if (INT_GET(bf[1].length, ARCH_CONVERT) >= len)
+                                               dup = (xfs_dir2_data_unused_t *)
+                                                     ((char *)block +
+                                                      INT_GET(bf[1].offset, ARCH_CONVERT));
+                                       else
+                                               dup = NULL;
+                               }
+                       } else {
+                               /*
+                                * Not the same free entry,
+                                * just check its length.
+                                */
+                               if (INT_GET(dup->length, ARCH_CONVERT) < len) {
+#pragma mips_frequency_hint NEVER
+                                       dup = NULL;
+                               }
+                       }
+               }
+               compact = 0;
+       }
+       /*
+        * If there are stale entries we'll use one for the leaf.
+        * Is the biggest entry enough to avoid compaction?
+        */
+       else if (INT_GET(bf[0].length, ARCH_CONVERT) >= len) {
+               dup = (xfs_dir2_data_unused_t *)
+                     ((char *)block + INT_GET(bf[0].offset, ARCH_CONVERT));
+               compact = 0;
+       }
+       /*
+        * Will need to compact to make this work.
+        */
+       else {
+#pragma mips_frequency_hint NEVER
+               /*
+                * Tag just before the first leaf entry.
+                */
+               tagp = (xfs_dir2_data_off_t *)blp - 1;
+               /*
+                * Data object just before the first leaf entry.
+                */
+               dup = (xfs_dir2_data_unused_t *)((char *)block + INT_GET(*tagp, ARCH_CONVERT));
+               /*
+                * If it's not free then the data will go where the
+                * leaf data starts now, if it works at all.
+                */
+               if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG) {
+                       if (INT_GET(dup->length, ARCH_CONVERT) + (INT_GET(btp->stale, ARCH_CONVERT) - 1) *
+                           (uint)sizeof(*blp) < len)
+                               dup = NULL;
+               } else if ((INT_GET(btp->stale, ARCH_CONVERT) - 1) * (uint)sizeof(*blp) < len)
+                       dup = NULL;
+               else
+                       dup = (xfs_dir2_data_unused_t *)blp;
+               compact = 1;
+       }
+       /*
+        * If this isn't a real add, we're done with the buffer.
+        */
+       if (args->justcheck)
+               xfs_da_brelse(tp, bp);
+       /*
+        * If we don't have space for the new entry & leaf ...
+        */
+       if (!dup) {
+#pragma mips_frequency_hint NEVER
+               /*
+                * Not trying to actually do anything, or don't have
+                * a space reservation: return no-space.
+                */
+               if (args->justcheck || args->total == 0)
+                       return XFS_ERROR(ENOSPC);
+               /*
+                * Convert to the next larger format.
+                * Then add the new entry in that format.
+                */
+               error = xfs_dir2_block_to_leaf(args, bp);
+               xfs_da_buf_done(bp);
+               if (error)
+                       return error;
+               return xfs_dir2_leaf_addname(args);
+       }
+       /*
+        * Just checking, and it would work, so say so.
+        */
+       if (args->justcheck)
+               return 0;
+       needlog = needscan = 0;
+       /*
+        * If need to compact the leaf entries, do it now.
+        * Leave the highest-numbered stale entry stale.
+        * XXX should be the one closest to mid but mid is not yet computed.
+        */
+       if (compact) {
+#pragma mips_frequency_hint NEVER
+               int     fromidx;                /* source leaf index */
+               int     toidx;                  /* target leaf index */
+
+               for (fromidx = toidx = INT_GET(btp->count, ARCH_CONVERT) - 1,
+                       highstale = lfloghigh = -1;
+                    fromidx >= 0;
+                    fromidx--) {
+                       if (INT_GET(blp[fromidx].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR) {
+                               if (highstale == -1)
+                                       highstale = toidx;
+                               else {
+                                       if (lfloghigh == -1)
+                                               lfloghigh = toidx;
+                                       continue;
+                               }
+                       }
+                       if (fromidx < toidx)
+                               blp[toidx] = blp[fromidx];
+                       toidx--;
+               }
+               lfloglow = toidx + 1 - (INT_GET(btp->stale, ARCH_CONVERT) - 1);
+               lfloghigh -= INT_GET(btp->stale, ARCH_CONVERT) - 1;
+               INT_MOD(btp->count, ARCH_CONVERT, -(INT_GET(btp->stale, ARCH_CONVERT) - 1));
+               xfs_dir2_data_make_free(tp, bp,
+                       (xfs_dir2_data_aoff_t)((char *)blp - (char *)block),
+                       (xfs_dir2_data_aoff_t)((INT_GET(btp->stale, ARCH_CONVERT) - 1) * sizeof(*blp)),
+                       &needlog, &needscan);
+               blp += INT_GET(btp->stale, ARCH_CONVERT) - 1;
+               INT_SET(btp->stale, ARCH_CONVERT, 1);
+               /*
+                * If we now need to rebuild the bestfree map, do so.
+                * This needs to happen before the next call to use_free.
+                */
+               if (needscan) {
+                       xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block,
+                               &needlog, NULL);
+                       needscan = 0;
+               }
+       }
+       /*
+        * Set leaf logging boundaries to impossible state.
+        * For the no-stale case they're set explicitly.
+        */
+       else if (INT_GET(btp->stale, ARCH_CONVERT)) {
+               lfloglow = INT_GET(btp->count, ARCH_CONVERT);
+               lfloghigh = -1;
+       }
+       /*
+        * Find the slot that's first lower than our hash value, -1 if none.
+        */
+       for (low = 0, high = INT_GET(btp->count, ARCH_CONVERT) - 1; low <= high; ) {
+               mid = (low + high) >> 1;
+               if ((hash = INT_GET(blp[mid].hashval, ARCH_CONVERT)) == args->hashval)
+                       break;
+               if (hash < args->hashval)
+                       low = mid + 1;
+               else
+                       high = mid - 1;
+       }
+       while (mid >= 0 && INT_GET(blp[mid].hashval, ARCH_CONVERT) >= args->hashval) {
+#pragma mips_frequency_hint NEVER
+               mid--;
+       }
+       /*
+        * No stale entries, will use enddup space to hold new leaf.
+        */
+       if (INT_GET(btp->stale, ARCH_CONVERT) == 0) {
+               /*
+                * Mark the space needed for the new leaf entry, now in use.
+                */
+               xfs_dir2_data_use_free(tp, bp, enddup,
+                       (xfs_dir2_data_aoff_t)
+                       ((char *)enddup - (char *)block + INT_GET(enddup->length, ARCH_CONVERT) -
+                        sizeof(*blp)),
+                       (xfs_dir2_data_aoff_t)sizeof(*blp),
+                       &needlog, &needscan);
+               /*
+                * Update the tail (entry count).
+                */
+               INT_MOD(btp->count, ARCH_CONVERT, +1);
+               /*
+                * If we now need to rebuild the bestfree map, do so.
+                * This needs to happen before the next call to use_free.
+                */
+               if (needscan) {
+                       xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block,
+                               &needlog, NULL);
+                       needscan = 0;
+               }
+               /*
+                * Adjust pointer to the first leaf entry, we're about to move
+                * the table up one to open up space for the new leaf entry.
+                * Then adjust our index to match.
+                */
+               blp--;
+               mid++;
+               if (mid)
+                       ovbcopy(&blp[1], blp, mid * sizeof(*blp));
+               lfloglow = 0;
+               lfloghigh = mid;
+       }
+       /*
+        * Use a stale leaf for our new entry.
+        */
+       else {
+               for (lowstale = mid;
+                    lowstale >= 0 &&
+                       INT_GET(blp[lowstale].address, ARCH_CONVERT) != XFS_DIR2_NULL_DATAPTR;
+                    lowstale--)
+                       continue;
+               for (highstale = mid + 1;
+                    highstale < INT_GET(btp->count, ARCH_CONVERT) &&
+                       INT_GET(blp[highstale].address, ARCH_CONVERT) != XFS_DIR2_NULL_DATAPTR &&
+                       (lowstale < 0 || mid - lowstale > highstale - mid);
+                    highstale++)
+                       continue;
+               /*
+                * Move entries toward the low-numbered stale entry.
+                */
+               if (lowstale >= 0 &&
+                   (highstale == INT_GET(btp->count, ARCH_CONVERT) ||
+                    mid - lowstale <= highstale - mid)) {
+                       if (mid - lowstale)
+                               ovbcopy(&blp[lowstale + 1], &blp[lowstale],
+                                       (mid - lowstale) * sizeof(*blp));
+                       lfloglow = MIN(lowstale, lfloglow);
+                       lfloghigh = MAX(mid, lfloghigh);
+               }
+               /*
+                * Move entries toward the high-numbered stale entry.
+                */
+               else {
+                       ASSERT(highstale < INT_GET(btp->count, ARCH_CONVERT));
+                       mid++;
+                       if (highstale - mid)
+                               ovbcopy(&blp[mid], &blp[mid + 1],
+                                       (highstale - mid) * sizeof(*blp));
+                       lfloglow = MIN(mid, lfloglow);
+                       lfloghigh = MAX(highstale, lfloghigh);
+               }
+               INT_MOD(btp->stale, ARCH_CONVERT, -1);
+       }
+       /*
+        * Point to the new data entry.
+        */
+       dep = (xfs_dir2_data_entry_t *)dup;
+       /*
+        * Fill in the leaf entry.
+        */
+       INT_SET(blp[mid].hashval, ARCH_CONVERT, args->hashval);
+       INT_SET(blp[mid].address, ARCH_CONVERT, XFS_DIR2_BYTE_TO_DATAPTR(mp, (char *)dep - (char *)block));
+       xfs_dir2_block_log_leaf(tp, bp, lfloglow, lfloghigh);
+       /*
+        * Mark space for the data entry used.
+        */
+       xfs_dir2_data_use_free(tp, bp, dup,
+               (xfs_dir2_data_aoff_t)((char *)dup - (char *)block),
+               (xfs_dir2_data_aoff_t)len, &needlog, &needscan);
+       /*
+        * Create the new data entry.
+        */
+       INT_SET(dep->inumber, ARCH_CONVERT, args->inumber);
+       dep->namelen = args->namelen;
+       bcopy(args->name, dep->name, args->namelen);
+       tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
+       INT_SET(*tagp, ARCH_CONVERT, (xfs_dir2_data_off_t)((char *)dep - (char *)block));
+       /*
+        * Clean up the bestfree array and log the header, tail, and entry.
+        */
+       if (needscan)
+               xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog,
+                       NULL);
+       if (needlog)
+               xfs_dir2_data_log_header(tp, bp);
+       xfs_dir2_block_log_tail(tp, bp);
+       xfs_dir2_data_log_entry(tp, bp, dep);
+       xfs_dir2_data_check(dp, bp);
+       xfs_da_buf_done(bp);
+       return 0;
+}
+
+/*
+ * Log leaf entries from the block.
+ */
+STATIC void
+xfs_dir2_block_log_leaf(
+       xfs_trans_t             *tp,            /* transaction structure */
+       xfs_dabuf_t             *bp,            /* block buffer */
+       int                     first,          /* index of first logged leaf */
+       int                     last)           /* index of last logged leaf */
+{
+       xfs_dir2_block_t        *block;         /* directory block structure */
+       xfs_dir2_leaf_entry_t   *blp;           /* block leaf entries */
+       xfs_dir2_block_tail_t   *btp;           /* block tail */
+       xfs_mount_t             *mp;            /* filesystem mount point */
+
+       mp = tp->t_mountp;
+       block = bp->data;
+       btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+       blp = XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT);
+       xfs_da_log_buf(tp, bp, (uint)((char *)&blp[first] - (char *)block),
+               (uint)((char *)&blp[last + 1] - (char *)block - 1));
+}
+
+/*
+ * Log the block tail.
+ */
+STATIC void
+xfs_dir2_block_log_tail(
+       xfs_trans_t             *tp,            /* transaction structure */
+       xfs_dabuf_t             *bp)            /* block buffer */
+{
+       xfs_dir2_block_t        *block;         /* directory block structure */
+       xfs_dir2_block_tail_t   *btp;           /* block tail */
+       xfs_mount_t             *mp;            /* filesystem mount point */
+
+       mp = tp->t_mountp;
+       block = bp->data;
+       btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+       xfs_da_log_buf(tp, bp, (uint)((char *)btp - (char *)block),
+               (uint)((char *)(btp + 1) - (char *)block - 1));
+}
+
+/*
+ * Look up an entry in the block.  This is the external routine,
+ * xfs_dir2_block_lookup_int does the real work.
+ */
+int                                            /* error */
+xfs_dir2_block_lookup(
+       xfs_da_args_t           *args)          /* dir lookup arguments */
+{
+       xfs_dir2_block_t        *block;         /* block structure */
+       xfs_dir2_leaf_entry_t   *blp;           /* block leaf entries */
+       xfs_dabuf_t             *bp;            /* block buffer */
+       xfs_dir2_block_tail_t   *btp;           /* block tail */
+       xfs_dir2_data_entry_t   *dep;           /* block data entry */
+       xfs_inode_t             *dp;            /* incore inode */
+       int                     ent;            /* entry index */
+       int                     error;          /* error return value */
+       xfs_mount_t             *mp;            /* filesystem mount point */
+
+       xfs_dir2_trace_args("block_lookup", args);
+       /*
+        * Get the buffer, look up the entry.
+        * If not found (ENOENT) then return, have no buffer.
+        */
+       if (error = xfs_dir2_block_lookup_int(args, &bp, &ent))
+               return error;
+       dp = args->dp;
+       mp = dp->i_mount;
+       block = bp->data;
+       xfs_dir2_data_check(dp, bp);
+       btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+       blp = XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT);
+       /*
+        * Get the offset from the leaf entry, to point to the data.
+        */
+       dep = (xfs_dir2_data_entry_t *)
+             ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, INT_GET(blp[ent].address, ARCH_CONVERT)));
+       /*
+        * Fill in inode number, release the block.
+        */
+       args->inumber = INT_GET(dep->inumber, ARCH_CONVERT);
+       xfs_da_brelse(args->trans, bp);
+       return XFS_ERROR(EEXIST);
+}
+
+/*
+ * Internal block lookup routine.
+ */
+STATIC int                                     /* error */
+xfs_dir2_block_lookup_int(
+       xfs_da_args_t           *args,          /* dir lookup arguments */
+       xfs_dabuf_t             **bpp,          /* returned block buffer */
+       int                     *entno)         /* returned entry number */
+{
+       xfs_dir2_dataptr_t      addr;           /* data entry address */
+       xfs_dir2_block_t        *block;         /* block structure */
+       xfs_dir2_leaf_entry_t   *blp;           /* block leaf entries */
+       xfs_dabuf_t             *bp;            /* block buffer */
+       xfs_dir2_block_tail_t   *btp;           /* block tail */
+       xfs_dir2_data_entry_t   *dep;           /* block data entry */
+       xfs_inode_t             *dp;            /* incore inode */
+       int                     error;          /* error return value */
+       xfs_dahash_t            hash;           /* found hash value */
+       int                     high;           /* binary search high index */
+       int                     low;            /* binary search low index */
+       int                     mid;            /* binary search current idx */
+       xfs_mount_t             *mp;            /* filesystem mount point */
+       xfs_trans_t             *tp;            /* transaction pointer */
+
+       dp = args->dp;
+       tp = args->trans;
+       mp = dp->i_mount;
+       /*
+        * Read the buffer, return error if we can't get it.
+        */
+       if (error =
+           xfs_da_read_buf(tp, dp, mp->m_dirdatablk, -1, &bp, XFS_DATA_FORK)) {
+#pragma mips_frequency_hint NEVER
+               return error;
+       }
+       ASSERT(bp != NULL);
+       block = bp->data;
+       xfs_dir2_data_check(dp, bp);
+       btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+       blp = XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT);
+       /*
+        * Loop doing a binary search for our hash value.
+        * Find our entry, ENOENT if it's not there.
+        */
+       for (low = 0, high = INT_GET(btp->count, ARCH_CONVERT) - 1; ; ) {
+               ASSERT(low <= high);
+               mid = (low + high) >> 1;
+               if ((hash = INT_GET(blp[mid].hashval, ARCH_CONVERT)) == args->hashval)
+                       break;
+               if (hash < args->hashval)
+                       low = mid + 1;
+               else
+                       high = mid - 1;
+               if (low > high) {
+                       ASSERT(args->oknoent);
+                       xfs_da_brelse(tp, bp);
+                       return XFS_ERROR(ENOENT);
+               }
+       }
+       /*
+        * Back up to the first one with the right hash value.
+        */
+       while (mid > 0 && INT_GET(blp[mid - 1].hashval, ARCH_CONVERT) == args->hashval) {
+#pragma mips_frequency_hint NEVER
+               mid--;
+       }
+       /*
+        * Now loop forward through all the entries with the
+        * right hash value looking for our name.
+        */
+       do {
+               if ((addr = INT_GET(blp[mid].address, ARCH_CONVERT)) == XFS_DIR2_NULL_DATAPTR)
+                       continue;
+               /*
+                * Get pointer to the entry from the leaf.
+                */
+               dep = (xfs_dir2_data_entry_t *)
+                       ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, addr));
+               /*
+                * Compare, if it's right give back buffer & entry number.
+                */
+               if (dep->namelen == args->namelen &&
+                   dep->name[0] == args->name[0] &&
+                   bcmp(dep->name, args->name, args->namelen) == 0) {
+                       *bpp = bp;
+                       *entno = mid;
+                       return 0;
+               }
+       } while (++mid < INT_GET(btp->count, ARCH_CONVERT) && INT_GET(blp[mid].hashval, ARCH_CONVERT) == hash);
+       /*
+        * No match, release the buffer and return ENOENT.
+        */
+       ASSERT(args->oknoent);
+       xfs_da_brelse(tp, bp);
+       return XFS_ERROR(ENOENT);
+}
+
+/*
+ * Remove an entry from a block format directory.
+ * If that makes the block small enough to fit in shortform, transform it.
+ */
+int                                            /* error */
+xfs_dir2_block_removename(
+       xfs_da_args_t           *args)          /* directory operation args */
+{
+       xfs_dir2_block_t        *block;         /* block structure */
+       xfs_dir2_leaf_entry_t   *blp;           /* block leaf pointer */
+       xfs_dabuf_t             *bp;            /* block buffer */
+       xfs_dir2_block_tail_t   *btp;           /* block tail */
+       xfs_dir2_data_entry_t   *dep;           /* block data entry */
+       xfs_inode_t             *dp;            /* incore inode */
+       int                     ent;            /* block leaf entry index */
+       int                     error;          /* error return value */
+       xfs_mount_t             *mp;            /* filesystem mount point */
+       int                     needlog;        /* need to log block header */
+       int                     needscan;       /* need to fixup bestfree */
+       xfs_dir2_sf_hdr_t       sfh;            /* shortform header */
+       int                     size;           /* shortform size */
+       xfs_trans_t             *tp;            /* transaction pointer */
+
+       xfs_dir2_trace_args("block_removename", args);
+       /*
+        * Look up the entry in the block.  Gets the buffer and entry index.
+        * It will always be there, the vnodeops level does a lookup first.
+        */
+       if (error = xfs_dir2_block_lookup_int(args, &bp, &ent)) {
+#pragma mips_frequency_hint NEVER
+               return error;
+       }
+       dp = args->dp;
+       tp = args->trans;
+       mp = dp->i_mount;
+       block = bp->data;
+       btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+       blp = XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT);
+       /*
+        * Point to the data entry using the leaf entry.
+        */
+       dep = (xfs_dir2_data_entry_t *)
+             ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, INT_GET(blp[ent].address, ARCH_CONVERT)));
+       /*
+        * Mark the data entry's space free.
+        */
+       needlog = needscan = 0;
+       xfs_dir2_data_make_free(tp, bp,
+               (xfs_dir2_data_aoff_t)((char *)dep - (char *)block),
+               XFS_DIR2_DATA_ENTSIZE(dep->namelen), &needlog, &needscan);
+       /*
+        * Fix up the block tail.
+        */
+       INT_MOD(btp->stale, ARCH_CONVERT, +1);
+       xfs_dir2_block_log_tail(tp, bp);
+       /*
+        * Remove the leaf entry by marking it stale.
+        */
+       INT_SET(blp[ent].address, ARCH_CONVERT, XFS_DIR2_NULL_DATAPTR);
+       xfs_dir2_block_log_leaf(tp, bp, ent, ent);
+       /*
+        * Fix up bestfree, log the header if necessary.
+        */
+       if (needscan)
+               xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog,
+                       NULL);
+       if (needlog)
+               xfs_dir2_data_log_header(tp, bp);
+       xfs_dir2_data_check(dp, bp);
+       /*
+        * See if the size as a shortform is good enough.
+        */
+       if ((size = xfs_dir2_block_sfsize(dp, block, &sfh)) >
+           XFS_IFORK_DSIZE(dp)) {
+               xfs_da_buf_done(bp);
+               return 0;
+       }
+       /*
+        * If it works, do the conversion.
+        */
+       return xfs_dir2_block_to_sf(args, bp, size, &sfh);
+}
+
+/*
+ * Replace an entry in a V2 block directory.
+ * Change the inode number to the new value.
+ */
+int                                            /* error */
+xfs_dir2_block_replace(
+       xfs_da_args_t           *args)          /* directory operation args */
+{
+       xfs_dir2_block_t        *block;         /* block structure */
+       xfs_dir2_leaf_entry_t   *blp;           /* block leaf entries */
+       xfs_dabuf_t             *bp;            /* block buffer */
+       xfs_dir2_block_tail_t   *btp;           /* block tail */
+       xfs_dir2_data_entry_t   *dep;           /* block data entry */
+       xfs_inode_t             *dp;            /* incore inode */
+       int                     ent;            /* leaf entry index */
+       int                     error;          /* error return value */
+       xfs_mount_t             *mp;            /* filesystem mount point */
+
+       xfs_dir2_trace_args("block_replace", args);
+       /*
+        * Lookup the entry in the directory.  Get buffer and entry index.
+        * This will always succeed since the caller has already done a lookup.
+        */
+       if (error = xfs_dir2_block_lookup_int(args, &bp, &ent)) {
+#pragma mips_frequency_hint NEVER
+               return error;
+       }
+       dp = args->dp;
+       mp = dp->i_mount;
+       block = bp->data;
+       btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+       blp = XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT);
+       /*
+        * Point to the data entry we need to change.
+        */
+       dep = (xfs_dir2_data_entry_t *)
+             ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, INT_GET(blp[ent].address, ARCH_CONVERT)));
+       ASSERT(INT_GET(dep->inumber, ARCH_CONVERT) != args->inumber);
+       /*
+        * Change the inode number to the new value.
+        */
+       INT_SET(dep->inumber, ARCH_CONVERT, args->inumber);
+       xfs_dir2_data_log_entry(args->trans, bp, dep);
+       xfs_dir2_data_check(dp, bp);
+       xfs_da_buf_done(bp);
+       return 0;
+}
+
+/*
+ * Qsort comparison routine for the block leaf entries.
+ */
+static int                                     /* sort order */
+xfs_dir2_block_sort(
+       const void                      *a,     /* first leaf entry */
+       const void                      *b)     /* second leaf entry */
+{
+       const xfs_dir2_leaf_entry_t     *la;    /* first leaf entry */
+       const xfs_dir2_leaf_entry_t     *lb;    /* second leaf entry */
+
+       la = a;
+       lb = b;
+       return INT_GET(la->hashval, ARCH_CONVERT) < INT_GET(lb->hashval, ARCH_CONVERT) ? -1 :
+               (INT_GET(la->hashval, ARCH_CONVERT) > INT_GET(lb->hashval, ARCH_CONVERT) ? 1 : 0);
+}
+
+/*
+ * Convert a V2 leaf directory to a V2 block directory if possible.
+ */
+int                                            /* error */
+xfs_dir2_leaf_to_block(
+       xfs_da_args_t           *args,          /* operation arguments */
+       xfs_dabuf_t             *lbp,           /* leaf buffer */
+       xfs_dabuf_t             *dbp)           /* data buffer */
+{
+       xfs_dir2_data_off_t     *bestsp;        /* leaf bests table */
+       xfs_dir2_block_t        *block;         /* block structure */
+       xfs_dir2_block_tail_t   *btp;           /* block tail */
+       xfs_inode_t             *dp;            /* incore directory inode */
+       xfs_dir2_data_unused_t  *dup;           /* unused data entry */
+       int                     error;          /* error return value */
+       int                     from;           /* leaf from index */
+       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
+       xfs_dir2_leaf_entry_t   *lep;           /* leaf entry */
+       xfs_dir2_leaf_tail_t    *ltp;           /* leaf tail structure */
+       xfs_mount_t             *mp;            /* file system mount point */
+       int                     needlog;        /* need to log data header */
+       int                     needscan;       /* need to scan for bestfree */
+       xfs_dir2_sf_hdr_t       sfh;            /* shortform header */
+       int                     size;           /* bytes used */
+       xfs_dir2_data_off_t     *tagp;          /* end of entry (tag) */
+       int                     to;             /* block/leaf to index */
+       xfs_trans_t             *tp;            /* transaction pointer */
+
+       xfs_dir2_trace_args_bb("leaf_to_block", args, lbp, dbp);
+       dp = args->dp;
+       tp = args->trans;
+       mp = dp->i_mount;
+       leaf = lbp->data;
+       ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAF1_MAGIC);
+       ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+       /*
+        * If there are data blocks other than the first one, take this
+        * opportunity to remove trailing empty data blocks that may have
+        * been left behind during no-space-reservation operations.
+        * These will show up in the leaf bests table.
+        */
+       while (dp->i_d.di_size > mp->m_dirblksize) {
+               bestsp = XFS_DIR2_LEAF_BESTS_P_ARCH(ltp, ARCH_CONVERT);
+               if (INT_GET(bestsp[INT_GET(ltp->bestcount, ARCH_CONVERT) - 1], ARCH_CONVERT) ==
+                   mp->m_dirblksize - (uint)sizeof(block->hdr)) {
+#pragma mips_frequency_hint NEVER
+                       if (error =
+                           xfs_dir2_leaf_trim_data(args, lbp,
+                                   (xfs_dir2_db_t)(INT_GET(ltp->bestcount, ARCH_CONVERT) - 1)))
+                               goto out;
+               } else {
+                       error = 0;
+                       goto out;
+               }
+       }
+       /*
+        * Read the data block if we don't already have it, give up if it fails.
+        */
+       if (dbp == NULL &&
+           (error = xfs_da_read_buf(tp, dp, mp->m_dirdatablk, -1, &dbp,
+                   XFS_DATA_FORK))) {
+#pragma mips_frequency_hint NEVER
+               goto out;
+       }
+       block = dbp->data;
+       ASSERT(INT_GET(block->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC);
+       /*
+        * Size of the "leaf" area in the block.
+        */
+       size = (uint)sizeof(block->tail) +
+              (uint)sizeof(*lep) * (INT_GET(leaf->hdr.count, ARCH_CONVERT) - INT_GET(leaf->hdr.stale, ARCH_CONVERT));
+       /*
+        * Look at the last data entry.
+        */
+       tagp = (xfs_dir2_data_off_t *)((char *)block + mp->m_dirblksize) - 1;
+       dup = (xfs_dir2_data_unused_t *)((char *)block + INT_GET(*tagp, ARCH_CONVERT));
+       /*
+        * If it's not free or is too short we can't do it.
+        */
+       if (INT_GET(dup->freetag, ARCH_CONVERT) != XFS_DIR2_DATA_FREE_TAG || INT_GET(dup->length, ARCH_CONVERT) < size) {
+               error = 0;
+               goto out;
+       }
+       /*
+        * Start converting it to block form.
+        */
+       INT_SET(block->hdr.magic, ARCH_CONVERT, XFS_DIR2_BLOCK_MAGIC);
+       needlog = 1;
+       needscan = 0;
+       /*
+        * Use up the space at the end of the block (blp/btp).
+        */
+       xfs_dir2_data_use_free(tp, dbp, dup, mp->m_dirblksize - size, size,
+               &needlog, &needscan);
+       /*
+        * Initialize the block tail.
+        */
+       btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+       INT_SET(btp->count, ARCH_CONVERT, INT_GET(leaf->hdr.count, ARCH_CONVERT) - INT_GET(leaf->hdr.stale, ARCH_CONVERT));
+       INT_SET(btp->stale, ARCH_CONVERT, 0);
+       xfs_dir2_block_log_tail(tp, dbp);
+       /*
+        * Initialize the block leaf area.  We compact out stale entries.
+        */
+       lep = XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT);
+       for (from = to = 0; from < INT_GET(leaf->hdr.count, ARCH_CONVERT); from++) {
+               if (INT_GET(leaf->ents[from].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
+                       continue;
+               lep[to++] = leaf->ents[from];
+       }
+       ASSERT(to == INT_GET(btp->count, ARCH_CONVERT));
+       xfs_dir2_block_log_leaf(tp, dbp, 0, INT_GET(btp->count, ARCH_CONVERT) - 1);
+       /*
+        * Scan the bestfree if we need it and log the data block header.
+        */
+       if (needscan)
+               xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog,
+                       NULL);
+       if (needlog)
+               xfs_dir2_data_log_header(tp, dbp);
+       /*
+        * Pitch the old leaf block.
+        */
+       error = xfs_da_shrink_inode(args, mp->m_dirleafblk, lbp);
+       lbp = NULL;
+       if (error) {
+#pragma mips_frequency_hint NEVER
+               goto out;
+       }
+       /*
+        * Now see if the resulting block can be shrunken to shortform.
+        */
+       if ((size = xfs_dir2_block_sfsize(dp, block, &sfh)) >
+           XFS_IFORK_DSIZE(dp)) {
+               error = 0;
+               goto out;
+       }
+       return xfs_dir2_block_to_sf(args, dbp, size, &sfh);
+out:
+       if (lbp)
+               xfs_da_buf_done(lbp);
+       if (dbp)
+               xfs_da_buf_done(dbp);
+       return error;
+}
+
+/*
+ * Convert the shortform directory to block form.
+ */
+int                                            /* error */
+xfs_dir2_sf_to_block(
+       xfs_da_args_t           *args)          /* operation arguments */
+{
+       xfs_dir2_db_t           blkno;          /* dir-relative block # (0) */
+       xfs_dir2_block_t        *block;         /* block structure */
+       xfs_dir2_leaf_entry_t   *blp;           /* block leaf entries */
+       xfs_dabuf_t             *bp;            /* block buffer */
+       xfs_dir2_block_tail_t   *btp;           /* block tail pointer */
+       char                    buf[XFS_DIR2_SF_MAX_SIZE];      /* sf buffer */
+       xfs_dir2_data_entry_t   *dep;           /* data entry pointer */
+       xfs_inode_t             *dp;            /* incore directory inode */
+       int                     dummy;          /* trash */
+       xfs_dir2_data_unused_t  *dup;           /* unused entry pointer */
+       int                     endoffset;      /* end of data objects */
+       int                     error;          /* error return value */
+       int                     i;              /* index */
+       xfs_mount_t             *mp;            /* filesystem mount point */
+       int                     needlog;        /* need to log block header */
+       int                     needscan;       /* need to scan block freespc */
+       int                     newoffset;      /* offset from current entry */
+       int                     offset;         /* target block offset */
+       xfs_dir2_sf_entry_t     *sfep;          /* sf entry pointer */
+       xfs_dir2_sf_t           *sfp;           /* shortform structure */
+       xfs_dir2_data_off_t     *tagp;          /* end of data entry */
+       xfs_trans_t             *tp;            /* transaction pointer */
+
+       xfs_dir2_trace_args("sf_to_block", args);
+       dp = args->dp;
+       tp = args->trans;
+       mp = dp->i_mount;
+       ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
+       /*
+        * Bomb out if the shortform directory is way too short.
+        */
+       if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) {
+#pragma mips_frequency_hint NEVER
+               ASSERT(XFS_FORCED_SHUTDOWN(mp));
+               return XFS_ERROR(EIO);
+       }
+       ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
+       ASSERT(dp->i_df.if_u1.if_data != NULL);
+       sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+       ASSERT(dp->i_d.di_size >= XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count));
+       /*
+        * Copy the directory into the stack buffer.
+        * Then pitch the incore inode data so we can make extents.
+        */
+       bcopy(sfp, buf, dp->i_df.if_bytes);
+       xfs_idata_realloc(dp, -dp->i_df.if_bytes, XFS_DATA_FORK);
+       dp->i_d.di_size = 0;
+       xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
+       /*
+        * Reset pointer - old sfp is gone.
+        */
+       sfp = (xfs_dir2_sf_t *)buf;
+       /*
+        * Add block 0 to the inode.
+        */
+       error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE, &blkno);
+       if (error) {
+#pragma mips_frequency_hint NEVER
+               return error;
+       }
+       /*
+        * Initialize the data block.
+        */
+       error = xfs_dir2_data_init(args, blkno, &bp);
+       if (error) {
+#pragma mips_frequency_hint NEVER
+               return error;
+       }
+       block = bp->data;
+       INT_SET(block->hdr.magic, ARCH_CONVERT, XFS_DIR2_BLOCK_MAGIC);
+       /*
+        * Compute size of block "tail" area.
+        */
+       i = (uint)sizeof(*btp) +
+           (INT_GET(sfp->hdr.count, ARCH_CONVERT) + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t);
+       /*
+        * The whole thing is initialized to free by the init routine.
+        * Say we're using the leaf and tail area.
+        */
+       dup = (xfs_dir2_data_unused_t *)block->u;
+       needlog = needscan = 0;
+       xfs_dir2_data_use_free(tp, bp, dup, mp->m_dirblksize - i, i, &needlog,
+               &needscan);
+       ASSERT(needscan == 0);
+       /*
+        * Fill in the tail.
+        */
+       btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+       INT_SET(btp->count, ARCH_CONVERT, INT_GET(sfp->hdr.count, ARCH_CONVERT) + 2);   /* ., .. */
+       INT_ZERO(btp->stale, ARCH_CONVERT);
+       blp = XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT);
+       endoffset = (uint)((char *)blp - (char *)block);
+       /*
+        * Remove the freespace, we'll manage it.
+        */
+       xfs_dir2_data_use_free(tp, bp, dup,
+               (xfs_dir2_data_aoff_t)((char *)dup - (char *)block),
+               INT_GET(dup->length, ARCH_CONVERT), &needlog, &needscan);
+       /*
+        * Create entry for .
+        */
+       dep = (xfs_dir2_data_entry_t *)
+             ((char *)block + XFS_DIR2_DATA_DOT_OFFSET);
+       INT_SET(dep->inumber, ARCH_CONVERT, dp->i_ino);
+       dep->namelen = 1;
+       dep->name[0] = '.';
+       tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
+       INT_SET(*tagp, ARCH_CONVERT, (xfs_dir2_data_off_t)((char *)dep - (char *)block));
+       xfs_dir2_data_log_entry(tp, bp, dep);
+       INT_SET(blp[0].hashval, ARCH_CONVERT, xfs_dir_hash_dot);
+       INT_SET(blp[0].address, ARCH_CONVERT, XFS_DIR2_BYTE_TO_DATAPTR(mp, (char *)dep - (char *)block));
+       /*
+        * Create entry for ..
+        */
+       dep = (xfs_dir2_data_entry_t *)
+               ((char *)block + XFS_DIR2_DATA_DOTDOT_OFFSET);
+       INT_SET(dep->inumber, ARCH_CONVERT, XFS_DIR2_SF_GET_INUMBER_ARCH(sfp, &sfp->hdr.parent, ARCH_CONVERT));
+       dep->namelen = 2;
+       dep->name[0] = dep->name[1] = '.';
+       tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
+       INT_SET(*tagp, ARCH_CONVERT, (xfs_dir2_data_off_t)((char *)dep - (char *)block));
+       xfs_dir2_data_log_entry(tp, bp, dep);
+       INT_SET(blp[1].hashval, ARCH_CONVERT, xfs_dir_hash_dotdot);
+       INT_SET(blp[1].address, ARCH_CONVERT, XFS_DIR2_BYTE_TO_DATAPTR(mp, (char *)dep - (char *)block));
+       offset = XFS_DIR2_DATA_FIRST_OFFSET;
+       /*
+        * Loop over existing entries, stuff them in.
+        */
+       if ((i = 0) == INT_GET(sfp->hdr.count, ARCH_CONVERT))
+               sfep = NULL;
+       else
+               sfep = XFS_DIR2_SF_FIRSTENTRY(sfp);
+       /*
+        * Need to preserve the existing offset values in the sf directory.
+        * Insert holes (unused entries) where necessary.
+        */
+       while (offset < endoffset) {
+               /*
+                * sfep is null when we reach the end of the list.
+                */
+               if (sfep == NULL)
+                       newoffset = endoffset;
+               else
+                       newoffset = XFS_DIR2_SF_GET_OFFSET_ARCH(sfep, ARCH_CONVERT);
+               /*
+                * There should be a hole here, make one.
+                */
+               if (offset < newoffset) {
+                       dup = (xfs_dir2_data_unused_t *)
+                             ((char *)block + offset);
+                       INT_SET(dup->freetag, ARCH_CONVERT, XFS_DIR2_DATA_FREE_TAG);
+                       INT_SET(dup->length, ARCH_CONVERT, newoffset - offset);
+                       INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(dup, ARCH_CONVERT), ARCH_CONVERT,
+                               (xfs_dir2_data_off_t)
+                               ((char *)dup - (char *)block));
+                       xfs_dir2_data_log_unused(tp, bp, dup);
+                       (void)xfs_dir2_data_freeinsert((xfs_dir2_data_t *)block,
+                               dup, &dummy);
+                       offset += INT_GET(dup->length, ARCH_CONVERT);
+                       continue;
+               }
+               /*
+                * Copy a real entry.
+                */
+               dep = (xfs_dir2_data_entry_t *)((char *)block + newoffset);
+               INT_SET(dep->inumber, ARCH_CONVERT, XFS_DIR2_SF_GET_INUMBER_ARCH(sfp,
+                               XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT));
+               dep->namelen = sfep->namelen;
+               bcopy(sfep->name, dep->name, dep->namelen);
+               tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
+               INT_SET(*tagp, ARCH_CONVERT, (xfs_dir2_data_off_t)((char *)dep - (char *)block));
+               xfs_dir2_data_log_entry(tp, bp, dep);
+               INT_SET(blp[2 + i].hashval, ARCH_CONVERT, xfs_da_hashname((char *)sfep->name, sfep->namelen));
+               INT_SET(blp[2 + i].address, ARCH_CONVERT, XFS_DIR2_BYTE_TO_DATAPTR(mp,
+                                                (char *)dep - (char *)block));
+               offset = (int)((char *)(tagp + 1) - (char *)block);
+               if (++i == INT_GET(sfp->hdr.count, ARCH_CONVERT))
+                       sfep = NULL;
+               else
+                       sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep);
+       }
+       /*
+        * Sort the leaf entries by hash value.
+        */
+       qsort(blp, INT_GET(btp->count, ARCH_CONVERT), sizeof(*blp), xfs_dir2_block_sort);
+       /* 
+        * Log the leaf entry area and tail.
+        * Already logged the header in data_init, ignore needlog.
+        */
+       ASSERT(needscan == 0);
+       xfs_dir2_block_log_leaf(tp, bp, 0, INT_GET(btp->count, ARCH_CONVERT) - 1);
+       xfs_dir2_block_log_tail(tp, bp);
+       xfs_dir2_data_check(dp, bp);
+       xfs_da_buf_done(bp);
+       return 0;
+}
diff --git a/libxfs/xfs_dir2_data.c b/libxfs/xfs_dir2_data.c
new file mode 100644 (file)
index 0000000..d921a21
--- /dev/null
@@ -0,0 +1,832 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+/*
+ * xfs_dir2_data.c
+ * Core data block handling routines for XFS V2 directories.
+ * See xfs_dir2_data.h for data structures.
+ */
+#include <xfs.h>
+
+#ifdef DEBUG
+/*
+ * Check the consistency of the data block.
+ * The input can also be a block-format directory.
+ * Pop an assert if we find anything bad.
+ */
+void
+xfs_dir2_data_check(
+       xfs_inode_t             *dp,            /* incore inode pointer */
+       xfs_dabuf_t             *bp)            /* data block's buffer */
+{
+       xfs_dir2_dataptr_t      addr;           /* addr for leaf lookup */
+       xfs_dir2_data_free_t    *bf;            /* bestfree table */
+       xfs_dir2_block_tail_t   *btp;           /* block tail */
+       int                     count;          /* count of entries found */
+       xfs_dir2_data_t         *d;             /* data block pointer */
+       xfs_dir2_data_entry_t   *dep;           /* data entry */
+       xfs_dir2_data_free_t    *dfp;           /* bestfree entry */
+       xfs_dir2_data_unused_t  *dup;           /* unused entry */
+       char                    *endp;          /* end of useful data */
+       int                     freeseen;       /* mask of bestfrees seen */
+       xfs_dahash_t            hash;           /* hash of current name */
+       int                     i;              /* leaf index */
+       int                     lastfree;       /* last entry was unused */
+       xfs_dir2_leaf_entry_t   *lep;           /* block leaf entries */
+       xfs_mount_t             *mp;            /* filesystem mount point */
+       char                    *p;             /* current data position */
+       int                     stale;          /* count of stale leaves */
+
+       mp = dp->i_mount;
+       d = bp->data;
+       ASSERT(INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC ||
+              INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC);
+       bf = d->hdr.bestfree;
+       p = (char *)d->u;
+       if (INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC) {
+               btp = XFS_DIR2_BLOCK_TAIL_P(mp, (xfs_dir2_block_t *)d);
+               lep = XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT);
+               endp = (char *)lep;
+       } else
+               endp = (char *)d + mp->m_dirblksize;
+       count = lastfree = freeseen = 0;
+       /*
+        * Account for zero bestfree entries.
+        */
+       if (INT_GET(bf[0].length, ARCH_CONVERT) == 0) {
+               ASSERT(INT_GET(bf[0].offset, ARCH_CONVERT) == 0);
+               freeseen |= 1 << 0;
+       }
+       if (INT_GET(bf[1].length, ARCH_CONVERT) == 0) {
+               ASSERT(INT_GET(bf[1].offset, ARCH_CONVERT) == 0);
+               freeseen |= 1 << 1;
+       }
+       if (INT_GET(bf[2].length, ARCH_CONVERT) == 0) {
+               ASSERT(INT_GET(bf[2].offset, ARCH_CONVERT) == 0);
+               freeseen |= 1 << 2;
+       }
+       ASSERT(INT_GET(bf[0].length, ARCH_CONVERT) >= INT_GET(bf[1].length, ARCH_CONVERT));
+       ASSERT(INT_GET(bf[1].length, ARCH_CONVERT) >= INT_GET(bf[2].length, ARCH_CONVERT));
+       /*
+        * Loop over the data/unused entries.
+        */
+       while (p < endp) {
+               dup = (xfs_dir2_data_unused_t *)p;
+               /*
+                * If it's unused, look for the space in the bestfree table.
+                * If we find it, account for that, else make sure it 
+                * doesn't need to be there.
+                */
+               if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG) {
+                       ASSERT(lastfree == 0);
+                       ASSERT(INT_GET(*XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(dup, ARCH_CONVERT), ARCH_CONVERT) ==
+                              (char *)dup - (char *)d);
+                       dfp = xfs_dir2_data_freefind(d, dup);
+                       if (dfp) {
+                               i = (int)(dfp - bf);
+                               ASSERT((freeseen & (1 << i)) == 0);
+                               freeseen |= 1 << i;
+                       } else
+                               ASSERT(INT_GET(dup->length, ARCH_CONVERT) <= INT_GET(bf[2].length, ARCH_CONVERT));
+                       p += INT_GET(dup->length, ARCH_CONVERT);
+                       lastfree = 1;
+                       continue;
+               }
+               /*
+                * It's a real entry.  Validate the fields.
+                * If this is a block directory then make sure it's 
+                * in the leaf section of the block.
+                * The linear search is crude but this is DEBUG code.
+                */
+               dep = (xfs_dir2_data_entry_t *)p;
+               ASSERT(dep->namelen != 0);
+               ASSERT(xfs_dir_ino_validate(mp, INT_GET(dep->inumber, ARCH_CONVERT)) == 0);
+               ASSERT(INT_GET(*XFS_DIR2_DATA_ENTRY_TAG_P(dep), ARCH_CONVERT) ==
+                      (char *)dep - (char *)d);
+               count++;
+               lastfree = 0;
+               if (INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC) {
+                       addr = XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk,
+                               (xfs_dir2_data_aoff_t)
+                               ((char *)dep - (char *)d));
+                       hash = xfs_da_hashname((char *)dep->name, dep->namelen);
+                       for (i = 0; i < INT_GET(btp->count, ARCH_CONVERT); i++) {
+                               if (INT_GET(lep[i].address, ARCH_CONVERT) == addr &&
+                                   INT_GET(lep[i].hashval, ARCH_CONVERT) == hash)
+                                       break;
+                       }
+                       ASSERT(i < INT_GET(btp->count, ARCH_CONVERT));
+               }
+               p += XFS_DIR2_DATA_ENTSIZE(dep->namelen);
+       }
+       /*
+        * Need to have seen all the entries and all the bestfree slots.
+        */
+       ASSERT(freeseen == 7);
+       if (INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC) {
+               for (i = stale = 0; i < INT_GET(btp->count, ARCH_CONVERT); i++) {
+                       if (INT_GET(lep[i].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
+                               stale++;
+                       if (i > 0)
+                               ASSERT(INT_GET(lep[i].hashval, ARCH_CONVERT) >= INT_GET(lep[i - 1].hashval, ARCH_CONVERT));
+               }
+               ASSERT(count == INT_GET(btp->count, ARCH_CONVERT) - INT_GET(btp->stale, ARCH_CONVERT));
+               ASSERT(stale == INT_GET(btp->stale, ARCH_CONVERT));
+       }
+}
+#endif
+
+/*
+ * Given a data block and an unused entry from that block,
+ * return the bestfree entry if any that corresponds to it.
+ */
+xfs_dir2_data_free_t *
+xfs_dir2_data_freefind(
+       xfs_dir2_data_t         *d,             /* data block */
+       xfs_dir2_data_unused_t  *dup)           /* data unused entry */
+{
+       xfs_dir2_data_free_t    *dfp;           /* bestfree entry */
+       xfs_dir2_data_aoff_t    off;            /* offset value needed */
+#if defined(DEBUG) && defined(__KERNEL__)
+       int                     matched;        /* matched the value */
+       int                     seenzero;       /* saw a 0 bestfree entry */
+#endif
+
+       off = (xfs_dir2_data_aoff_t)((char *)dup - (char *)d);
+#if defined(DEBUG) && defined(__KERNEL__)
+       /*
+        * Validate some consistency in the bestfree table.
+        * Check order, non-overlapping entries, and if we find the
+        * one we're looking for it has to be exact.
+        */
+       ASSERT(INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC ||
+              INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC);
+       for (dfp = &d->hdr.bestfree[0], seenzero = matched = 0;
+            dfp < &d->hdr.bestfree[XFS_DIR2_DATA_FD_COUNT];
+            dfp++) {
+               if (INT_GET(dfp->offset, ARCH_CONVERT) == 0) {
+                       ASSERT(INT_GET(dfp->length, ARCH_CONVERT) == 0);
+                       seenzero = 1;
+                       continue;
+               }
+               ASSERT(seenzero == 0);
+               if (INT_GET(dfp->offset, ARCH_CONVERT) == off) {
+                       matched = 1;
+                       ASSERT(INT_GET(dfp->length, ARCH_CONVERT) == INT_GET(dup->length, ARCH_CONVERT));
+               } else if (off < INT_GET(dfp->offset, ARCH_CONVERT))
+                       ASSERT(off + INT_GET(dup->length, ARCH_CONVERT) <= INT_GET(dfp->offset, ARCH_CONVERT));
+               else
+                       ASSERT(INT_GET(dfp->offset, ARCH_CONVERT) + INT_GET(dfp->length, ARCH_CONVERT) <= off);
+               ASSERT(matched || INT_GET(dfp->length, ARCH_CONVERT) >= INT_GET(dup->length, ARCH_CONVERT));
+               if (dfp > &d->hdr.bestfree[0])
+                       ASSERT(INT_GET(dfp[-1].length, ARCH_CONVERT) >= INT_GET(dfp[0].length, ARCH_CONVERT));
+       }
+#endif
+       /*
+        * If this is smaller than the smallest bestfree entry,
+        * it can't be there since they're sorted.
+        */
+       if (INT_GET(dup->length, ARCH_CONVERT) < INT_GET(d->hdr.bestfree[XFS_DIR2_DATA_FD_COUNT - 1].length, ARCH_CONVERT))
+               return NULL;
+       /*
+        * Look at the three bestfree entries for our guy.
+        */
+       for (dfp = &d->hdr.bestfree[0];
+            dfp < &d->hdr.bestfree[XFS_DIR2_DATA_FD_COUNT];
+            dfp++) {
+               if (INT_GET(dfp->offset, ARCH_CONVERT) == 0)
+                       return NULL;
+               if (INT_GET(dfp->offset, ARCH_CONVERT) == off)
+                       return dfp;
+       }
+       /*
+        * Didn't find it.  This only happens if there are duplicate lengths.
+        */
+       return NULL;
+}
+
+/*
+ * Insert an unused-space entry into the bestfree table.
+ */
+xfs_dir2_data_free_t *                         /* entry inserted */
+xfs_dir2_data_freeinsert(
+       xfs_dir2_data_t         *d,             /* data block pointer */
+       xfs_dir2_data_unused_t  *dup,           /* unused space */
+       int                     *loghead)       /* log the data header (out) */
+{
+       xfs_dir2_data_free_t    *dfp;           /* bestfree table pointer */
+       xfs_dir2_data_free_t    new;            /* new bestfree entry */
+
+#ifdef __KERNEL__
+       ASSERT(INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC ||
+              INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC);
+#endif
+       dfp = d->hdr.bestfree;
+       INT_COPY(new.length, dup->length, ARCH_CONVERT); 
+       INT_SET(new.offset, ARCH_CONVERT, (xfs_dir2_data_off_t)((char *)dup - (char *)d));
+       /*
+        * Insert at position 0, 1, or 2; or not at all.
+        */
+       if (INT_GET(new.length, ARCH_CONVERT) > INT_GET(dfp[0].length, ARCH_CONVERT)) {
+               dfp[2] = dfp[1];
+               dfp[1] = dfp[0];
+               dfp[0] = new;
+               *loghead = 1;
+               return &dfp[0];
+       }
+       if (INT_GET(new.length, ARCH_CONVERT) > INT_GET(dfp[1].length, ARCH_CONVERT)) {
+               dfp[2] = dfp[1];
+               dfp[1] = new;
+               *loghead = 1;
+               return &dfp[1];
+       }
+       if (INT_GET(new.length, ARCH_CONVERT) > INT_GET(dfp[2].length, ARCH_CONVERT)) {
+               dfp[2] = new;
+               *loghead = 1;
+               return &dfp[2];
+       }
+       return NULL;
+}
+
+/*
+ * Remove a bestfree entry from the table.
+ */
+void
+xfs_dir2_data_freeremove(
+       xfs_dir2_data_t         *d,             /* data block pointer */
+       xfs_dir2_data_free_t    *dfp,           /* bestfree entry pointer */
+       int                     *loghead)       /* out: log data header */
+{
+#ifdef __KERNEL__
+       ASSERT(INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC ||
+              INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC);
+#endif
+       /*
+        * It's the first entry, slide the next 2 up.
+        */
+       if (dfp == &d->hdr.bestfree[0]) {
+               d->hdr.bestfree[0] = d->hdr.bestfree[1];
+               d->hdr.bestfree[1] = d->hdr.bestfree[2];
+       }
+       /*
+        * It's the second entry, slide the 3rd entry up.
+        */
+       else if (dfp == &d->hdr.bestfree[1])
+               d->hdr.bestfree[1] = d->hdr.bestfree[2];
+       /*
+        * Must be the last entry.
+        */
+       else
+               ASSERT(dfp == &d->hdr.bestfree[2]);
+       /*
+        * Clear the 3rd entry, must be zero now.
+        */
+        INT_ZERO(d->hdr.bestfree[2].length, ARCH_CONVERT);
+       INT_ZERO(d->hdr.bestfree[2].offset, ARCH_CONVERT);
+       *loghead = 1;
+}
+
+/*
+ * Given a data block, reconstruct its bestfree map.
+ */
+void
+xfs_dir2_data_freescan(
+       xfs_mount_t             *mp,            /* filesystem mount point */
+       xfs_dir2_data_t         *d,             /* data block pointer */
+       int                     *loghead,       /* out: log data header */
+       char                    *aendp)         /* in: caller's endp */
+{
+       xfs_dir2_block_tail_t   *btp;           /* block tail */
+       xfs_dir2_data_entry_t   *dep;           /* active data entry */
+       xfs_dir2_data_unused_t  *dup;           /* unused data entry */
+       char                    *endp;          /* end of block's data */
+       char                    *p;             /* current entry pointer */
+
+#ifdef __KERNEL__
+       ASSERT(INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC ||
+              INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC);
+#endif
+       /*
+        * Start by clearing the table.
+        */
+       bzero(d->hdr.bestfree, sizeof(d->hdr.bestfree));
+       *loghead = 1;
+       /*
+        * Set up pointers.
+        */
+       p = (char *)d->u;
+       if (aendp)
+               endp = aendp;
+       else if (INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC) {
+               btp = XFS_DIR2_BLOCK_TAIL_P(mp, (xfs_dir2_block_t *)d);
+               endp = (char *)XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT);
+       } else
+               endp = (char *)d + mp->m_dirblksize;
+       /*
+        * Loop over the block's entries.
+        */
+       while (p < endp) {
+               dup = (xfs_dir2_data_unused_t *)p;
+               /*
+                * If it's a free entry, insert it.
+                */
+               if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG) {
+                       ASSERT((char *)dup - (char *)d ==
+                              INT_GET(*XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(dup, ARCH_CONVERT), ARCH_CONVERT));
+                       xfs_dir2_data_freeinsert(d, dup, loghead);
+                       p += INT_GET(dup->length, ARCH_CONVERT);
+               }
+               /*
+                * For active entries, check their tags and skip them.
+                */
+               else {
+                       dep = (xfs_dir2_data_entry_t *)p;
+                       ASSERT((char *)dep - (char *)d ==
+                              INT_GET(*XFS_DIR2_DATA_ENTRY_TAG_P(dep), ARCH_CONVERT));
+                       p += XFS_DIR2_DATA_ENTSIZE(dep->namelen);
+               }
+       }
+}
+
+/*
+ * Initialize a data block at the given block number in the directory.
+ * Give back the buffer for the created block.
+ */
+int                                            /* error */
+xfs_dir2_data_init(
+       xfs_da_args_t           *args,          /* directory operation args */
+       xfs_dir2_db_t           blkno,          /* logical dir block number */
+       xfs_dabuf_t             **bpp)          /* output block buffer */
+{
+       xfs_dabuf_t             *bp;            /* block buffer */
+       xfs_dir2_data_t         *d;             /* pointer to block */
+       xfs_inode_t             *dp;            /* incore directory inode */
+       xfs_dir2_data_unused_t  *dup;           /* unused entry pointer */
+       int                     error;          /* error return value */
+       int                     i;              /* bestfree index */
+       xfs_mount_t             *mp;            /* filesystem mount point */
+       xfs_trans_t             *tp;            /* transaction pointer */
+        int                     t;              /* temp */
+
+       dp = args->dp;
+       mp = dp->i_mount;
+       tp = args->trans;
+       /*
+        * Get the buffer set up for the block.
+        */
+       error = xfs_da_get_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, blkno), -1, &bp,
+               XFS_DATA_FORK);
+       if (error) {
+#pragma mips_frequency_hint NEVER
+               return error;
+       }
+       ASSERT(bp != NULL);
+       /*
+        * Initialize the header.
+        */
+       d = bp->data;
+       INT_SET(d->hdr.magic, ARCH_CONVERT, XFS_DIR2_DATA_MAGIC);
+       INT_SET(d->hdr.bestfree[0].offset, ARCH_CONVERT, (xfs_dir2_data_off_t)sizeof(d->hdr));
+       for (i = 1; i < XFS_DIR2_DATA_FD_COUNT; i++) {
+                INT_ZERO(d->hdr.bestfree[i].length, ARCH_CONVERT);
+               INT_ZERO(d->hdr.bestfree[i].offset, ARCH_CONVERT);
+        }
+       /*
+        * Set up an unused entry for the block's body.
+        */
+       dup = &d->u[0].unused;
+       INT_SET(dup->freetag, ARCH_CONVERT, XFS_DIR2_DATA_FREE_TAG);
+        
+        t=mp->m_dirblksize - (uint)sizeof(d->hdr);
+        INT_SET(d->hdr.bestfree[0].length, ARCH_CONVERT, t);
+       INT_SET(dup->length, ARCH_CONVERT, t);
+       INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(dup, ARCH_CONVERT), ARCH_CONVERT,
+               (xfs_dir2_data_off_t)((char *)dup - (char *)d));
+       /*
+        * Log it and return it.
+        */
+       xfs_dir2_data_log_header(tp, bp);
+       xfs_dir2_data_log_unused(tp, bp, dup);
+       *bpp = bp;
+       return 0;
+}
+
+/*
+ * Log an active data entry from the block.
+ */
+void
+xfs_dir2_data_log_entry(
+       xfs_trans_t             *tp,            /* transaction pointer */
+       xfs_dabuf_t             *bp,            /* block buffer */
+       xfs_dir2_data_entry_t   *dep)           /* data entry pointer */
+{
+       xfs_dir2_data_t         *d;             /* data block pointer */
+
+       d = bp->data;
+       ASSERT(INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC ||
+              INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC);
+       xfs_da_log_buf(tp, bp, (uint)((char *)dep - (char *)d),
+               (uint)((char *)(XFS_DIR2_DATA_ENTRY_TAG_P(dep) + 1) -
+                      (char *)d - 1));
+}
+
+/*
+ * Log a data block header.
+ */
+void
+xfs_dir2_data_log_header(
+       xfs_trans_t             *tp,            /* transaction pointer */
+       xfs_dabuf_t             *bp)            /* block buffer */
+{
+       xfs_dir2_data_t         *d;             /* data block pointer */
+
+       d = bp->data;
+       ASSERT(INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC ||
+              INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC);
+       xfs_da_log_buf(tp, bp, (uint)((char *)&d->hdr - (char *)d), 
+               (uint)(sizeof(d->hdr) - 1));
+}
+
+/*
+ * Log a data unused entry.
+ */
+void
+xfs_dir2_data_log_unused(
+       xfs_trans_t             *tp,            /* transaction pointer */
+       xfs_dabuf_t             *bp,            /* block buffer */
+       xfs_dir2_data_unused_t  *dup)           /* data unused pointer */
+{
+       xfs_dir2_data_t         *d;             /* data block pointer */
+
+       d = bp->data;
+       ASSERT(INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC ||
+              INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC);
+       /*
+        * Log the first part of the unused entry.
+        */
+       xfs_da_log_buf(tp, bp, (uint)((char *)dup - (char *)d),
+               (uint)((char *)&dup->length + sizeof(dup->length) -
+                      1 - (char *)d));
+       /*
+        * Log the end (tag) of the unused entry.
+        */
+       xfs_da_log_buf(tp, bp,
+               (uint)((char *)XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(dup, ARCH_CONVERT) - (char *)d),
+               (uint)((char *)XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(dup, ARCH_CONVERT) - (char *)d +
+                      sizeof(xfs_dir2_data_off_t) - 1));
+}
+
+/*
+ * Make a byte range in the data block unused.
+ * Its current contents are unimportant.
+ */
+void
+xfs_dir2_data_make_free(
+       xfs_trans_t             *tp,            /* transaction pointer */
+       xfs_dabuf_t             *bp,            /* block buffer */
+       xfs_dir2_data_aoff_t    offset,         /* starting byte offset */
+       xfs_dir2_data_aoff_t    len,            /* length in bytes */
+       int                     *needlogp,      /* out: log header */
+       int                     *needscanp)     /* out: regen bestfree */
+{
+       xfs_dir2_data_t         *d;             /* data block pointer */
+       xfs_dir2_data_free_t    *dfp;           /* bestfree pointer */
+       char                    *endptr;        /* end of data area */
+       xfs_mount_t             *mp;            /* filesystem mount point */
+       int                     needscan;       /* need to regen bestfree */
+       xfs_dir2_data_unused_t  *newdup;        /* new unused entry */
+       xfs_dir2_data_unused_t  *postdup;       /* unused entry after us */
+       xfs_dir2_data_unused_t  *prevdup;       /* unused entry before us */
+
+       mp = tp->t_mountp;
+       d = bp->data;
+       /*
+        * Figure out where the end of the data area is.
+        */
+       if (INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC)
+               endptr = (char *)d + mp->m_dirblksize;
+       else {
+               xfs_dir2_block_tail_t   *btp;   /* block tail */
+
+               ASSERT(INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC);
+               btp = XFS_DIR2_BLOCK_TAIL_P(mp, (xfs_dir2_block_t *)d);
+               endptr = (char *)XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT);
+       }
+       /*
+        * If this isn't the start of the block, then back up to 
+        * the previous entry and see if it's free.
+        */
+       if (offset > sizeof(d->hdr)) {
+               xfs_dir2_data_off_t     *tagp;  /* tag just before us */
+
+               tagp = (xfs_dir2_data_off_t *)((char *)d + offset) - 1;
+               prevdup = (xfs_dir2_data_unused_t *)((char *)d + INT_GET(*tagp, ARCH_CONVERT));
+               if (INT_GET(prevdup->freetag, ARCH_CONVERT) != XFS_DIR2_DATA_FREE_TAG)
+                       prevdup = NULL;
+       } else
+               prevdup = NULL;
+       /*
+        * If this isn't the end of the block, see if the entry after
+        * us is free.
+        */
+       if ((char *)d + offset + len < endptr) {
+               postdup =
+                       (xfs_dir2_data_unused_t *)((char *)d + offset + len);
+               if (INT_GET(postdup->freetag, ARCH_CONVERT) != XFS_DIR2_DATA_FREE_TAG)
+                       postdup = NULL;
+       } else
+               postdup = NULL;
+       ASSERT(*needscanp == 0);
+       needscan = 0;
+       /*
+        * Previous and following entries are both free, 
+        * merge everything into a single free entry.
+        */
+       if (prevdup && postdup) {
+               xfs_dir2_data_free_t    *dfp2;  /* another bestfree pointer */
+
+               /*
+                * See if prevdup and/or postdup are in bestfree table.
+                */
+               dfp = xfs_dir2_data_freefind(d, prevdup);
+               dfp2 = xfs_dir2_data_freefind(d, postdup);
+               /*
+                * We need a rescan unless there are exactly 2 free entries
+                * namely our two.  Then we know what's happening, otherwise
+                * since the third bestfree is there, there might be more
+                * entries.
+                */
+               needscan = INT_GET(d->hdr.bestfree[2].length, ARCH_CONVERT) != 0;
+               /*
+                * Fix up the new big freespace.
+                */
+               INT_MOD(prevdup->length, ARCH_CONVERT, len + INT_GET(postdup->length, ARCH_CONVERT));
+               INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(prevdup, ARCH_CONVERT), ARCH_CONVERT,
+                       (xfs_dir2_data_off_t)((char *)prevdup - (char *)d));
+               xfs_dir2_data_log_unused(tp, bp, prevdup);
+               if (!needscan) {
+                       /*
+                        * Has to be the case that entries 0 and 1 are 
+                        * dfp and dfp2 (don't know which is which), and
+                        * entry 2 is empty.
+                        * Remove entry 1 first then entry 0.
+                        */
+                       ASSERT(dfp && dfp2);
+                       if (dfp == &d->hdr.bestfree[1]) {
+                               dfp = &d->hdr.bestfree[0];
+                               ASSERT(dfp2 == dfp);
+                               dfp2 = &d->hdr.bestfree[1];
+                       }
+                       xfs_dir2_data_freeremove(d, dfp2, needlogp);
+                       xfs_dir2_data_freeremove(d, dfp, needlogp);
+                       /*
+                        * Now insert the new entry.
+                        */
+                       dfp = xfs_dir2_data_freeinsert(d, prevdup, needlogp);
+                       ASSERT(dfp == &d->hdr.bestfree[0]);
+                       ASSERT(INT_GET(dfp->length, ARCH_CONVERT) == INT_GET(prevdup->length, ARCH_CONVERT));
+                       ASSERT(INT_GET(dfp[1].length, ARCH_CONVERT) == 0);
+                       ASSERT(INT_GET(dfp[2].length, ARCH_CONVERT) == 0);
+               }
+       }
+       /*
+        * The entry before us is free, merge with it.
+        */
+       else if (prevdup) {
+               dfp = xfs_dir2_data_freefind(d, prevdup);
+               INT_MOD(prevdup->length, ARCH_CONVERT, len);
+               INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(prevdup, ARCH_CONVERT), ARCH_CONVERT,
+                       (xfs_dir2_data_off_t)((char *)prevdup - (char *)d));
+               xfs_dir2_data_log_unused(tp, bp, prevdup);
+               /*
+                * If the previous entry was in the table, the new entry
+                * is longer, so it will be in the table too.  Remove
+                * the old one and add the new one.
+                */
+               if (dfp) {
+                       xfs_dir2_data_freeremove(d, dfp, needlogp);
+                       (void)xfs_dir2_data_freeinsert(d, prevdup, needlogp);
+               }
+               /*
+                * Otherwise we need a scan if the new entry is big enough.
+                */
+               else
+                       needscan = INT_GET(prevdup->length, ARCH_CONVERT) > INT_GET(d->hdr.bestfree[2].length, ARCH_CONVERT);
+       }
+       /*
+        * The following entry is free, merge with it.
+        */
+       else if (postdup) {
+               dfp = xfs_dir2_data_freefind(d, postdup);
+               newdup = (xfs_dir2_data_unused_t *)((char *)d + offset);
+               INT_SET(newdup->freetag, ARCH_CONVERT, XFS_DIR2_DATA_FREE_TAG);
+               INT_SET(newdup->length, ARCH_CONVERT, len + INT_GET(postdup->length, ARCH_CONVERT));
+               INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(newdup, ARCH_CONVERT), ARCH_CONVERT,
+                       (xfs_dir2_data_off_t)((char *)newdup - (char *)d));
+               xfs_dir2_data_log_unused(tp, bp, newdup);
+               /*
+                * If the following entry was in the table, the new entry
+                * is longer, so it will be in the table too.  Remove
+                * the old one and add the new one.
+                */
+               if (dfp) {
+                       xfs_dir2_data_freeremove(d, dfp, needlogp);
+                       (void)xfs_dir2_data_freeinsert(d, newdup, needlogp);
+               }
+               /*
+                * Otherwise we need a scan if the new entry is big enough.
+                */
+               else
+                       needscan = INT_GET(newdup->length, ARCH_CONVERT) > INT_GET(d->hdr.bestfree[2].length, ARCH_CONVERT);
+       }
+       /*
+        * Neither neighbor is free.  Make a new entry.
+        */
+       else {
+               newdup = (xfs_dir2_data_unused_t *)((char *)d + offset);
+               INT_SET(newdup->freetag, ARCH_CONVERT, XFS_DIR2_DATA_FREE_TAG);
+               INT_SET(newdup->length, ARCH_CONVERT, len);
+               INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(newdup, ARCH_CONVERT), ARCH_CONVERT,
+                       (xfs_dir2_data_off_t)((char *)newdup - (char *)d));
+               xfs_dir2_data_log_unused(tp, bp, newdup);
+               (void)xfs_dir2_data_freeinsert(d, newdup, needlogp);
+       }
+       *needscanp = needscan;
+}
+
+/*
+ * Take a byte range out of an existing unused space and make it un-free.
+ */
+void
+xfs_dir2_data_use_free(
+       xfs_trans_t             *tp,            /* transaction pointer */
+       xfs_dabuf_t             *bp,            /* data block buffer */
+       xfs_dir2_data_unused_t  *dup,           /* unused entry */
+       xfs_dir2_data_aoff_t    offset,         /* starting offset to use */
+       xfs_dir2_data_aoff_t    len,            /* length to use */
+       int                     *needlogp,      /* out: need to log header */
+       int                     *needscanp)     /* out: need regen bestfree */
+{
+       xfs_dir2_data_t         *d;             /* data block */
+       xfs_dir2_data_free_t    *dfp;           /* bestfree pointer */
+       int                     matchback;      /* matches end of freespace */
+       int                     matchfront;     /* matches start of freespace */
+       int                     needscan;       /* need to regen bestfree */
+       xfs_dir2_data_unused_t  *newdup;        /* new unused entry */
+       xfs_dir2_data_unused_t  *newdup2;       /* another new unused entry */
+       int                     oldlen;         /* old unused entry's length */
+
+       d = bp->data;
+       ASSERT(INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC ||
+              INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC);
+       ASSERT(INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG);
+       ASSERT(offset >= (char *)dup - (char *)d);
+       ASSERT(offset + len <= (char *)dup + INT_GET(dup->length, ARCH_CONVERT) - (char *)d);
+       ASSERT((char *)dup - (char *)d == INT_GET(*XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(dup, ARCH_CONVERT), ARCH_CONVERT));
+       /*
+        * Look up the entry in the bestfree table.
+        */
+       dfp = xfs_dir2_data_freefind(d, dup);
+       oldlen = INT_GET(dup->length, ARCH_CONVERT);
+       ASSERT(dfp || oldlen <= INT_GET(d->hdr.bestfree[2].length, ARCH_CONVERT));
+       /*
+        * Check for alignment with front and back of the entry.
+        */
+       matchfront = (char *)dup - (char *)d == offset;
+       matchback = (char *)dup + oldlen - (char *)d == offset + len;
+       ASSERT(*needscanp == 0);
+       needscan = 0;
+       /*
+        * If we matched it exactly we just need to get rid of it from
+        * the bestfree table.
+        */
+       if (matchfront && matchback) {
+               if (dfp) {
+                       needscan = INT_GET(d->hdr.bestfree[2].offset, ARCH_CONVERT) != 0;
+                       if (!needscan)
+                               xfs_dir2_data_freeremove(d, dfp, needlogp);
+               }
+       }
+       /*
+        * We match the first part of the entry.
+        * Make a new entry with the remaining freespace.
+        */
+       else if (matchfront) {
+               newdup = (xfs_dir2_data_unused_t *)((char *)d + offset + len);
+               INT_SET(newdup->freetag, ARCH_CONVERT, XFS_DIR2_DATA_FREE_TAG);
+               INT_SET(newdup->length, ARCH_CONVERT, oldlen - len);
+               INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(newdup, ARCH_CONVERT), ARCH_CONVERT,
+                       (xfs_dir2_data_off_t)((char *)newdup - (char *)d));
+               xfs_dir2_data_log_unused(tp, bp, newdup);
+               /*
+                * If it was in the table, remove it and add the new one.
+                */
+               if (dfp) {
+                       xfs_dir2_data_freeremove(d, dfp, needlogp);
+                       dfp = xfs_dir2_data_freeinsert(d, newdup, needlogp);
+                       ASSERT(dfp != NULL);
+                       ASSERT(INT_GET(dfp->length, ARCH_CONVERT) == INT_GET(newdup->length, ARCH_CONVERT));
+                       ASSERT(INT_GET(dfp->offset, ARCH_CONVERT) == (char *)newdup - (char *)d);
+                       /*
+                        * If we got inserted at the last slot,
+                        * that means we don't know if there was a better
+                        * choice for the last slot, or not.  Rescan.
+                        */
+                       needscan = dfp == &d->hdr.bestfree[2];
+               }
+       }
+       /*
+        * We match the last part of the entry.
+        * Trim the allocated space off the tail of the entry.
+        */
+       else if (matchback) {
+               newdup = dup;
+               INT_SET(newdup->length, ARCH_CONVERT, (xfs_dir2_data_off_t)
+                       (((char *)d + offset) - (char *)newdup));
+               INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(newdup, ARCH_CONVERT), ARCH_CONVERT,
+                       (xfs_dir2_data_off_t)((char *)newdup - (char *)d));
+               xfs_dir2_data_log_unused(tp, bp, newdup);
+               /*
+                * If it was in the table, remove it and add the new one.
+                */
+               if (dfp) {
+                       xfs_dir2_data_freeremove(d, dfp, needlogp);
+                       dfp = xfs_dir2_data_freeinsert(d, newdup, needlogp);
+                       ASSERT(dfp != NULL);
+                       ASSERT(INT_GET(dfp->length, ARCH_CONVERT) == INT_GET(newdup->length, ARCH_CONVERT));
+                       ASSERT(INT_GET(dfp->offset, ARCH_CONVERT) == (char *)newdup - (char *)d);
+                       /*
+                        * If we got inserted at the last slot,
+                        * that means we don't know if there was a better
+                        * choice for the last slot, or not.  Rescan.
+                        */
+                       needscan = dfp == &d->hdr.bestfree[2];
+               }
+       }
+       /*
+        * Poking out the middle of an entry.
+        * Make two new entries.
+        */
+       else {
+               newdup = dup;
+               INT_SET(newdup->length, ARCH_CONVERT, (xfs_dir2_data_off_t)
+                       (((char *)d + offset) - (char *)newdup));
+               INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(newdup, ARCH_CONVERT), ARCH_CONVERT,
+                       (xfs_dir2_data_off_t)((char *)newdup - (char *)d));
+               xfs_dir2_data_log_unused(tp, bp, newdup);
+               newdup2 = (xfs_dir2_data_unused_t *)((char *)d + offset + len);
+               INT_SET(newdup2->freetag, ARCH_CONVERT, XFS_DIR2_DATA_FREE_TAG);
+               INT_SET(newdup2->length, ARCH_CONVERT, oldlen - len - INT_GET(newdup->length, ARCH_CONVERT));
+               INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(newdup2, ARCH_CONVERT), ARCH_CONVERT,
+                       (xfs_dir2_data_off_t)((char *)newdup2 - (char *)d));
+               xfs_dir2_data_log_unused(tp, bp, newdup2);
+               /*
+                * If the old entry was in the table, we need to scan
+                * if the 3rd entry was valid, since these entries
+                * are smaller than the old one.
+                * If we don't need to scan that means there were 1 or 2
+                * entries in the table, and removing the old and adding
+                * the 2 new will work.
+                */
+               if (dfp) {
+                       needscan = INT_GET(d->hdr.bestfree[2].length, ARCH_CONVERT) != 0;
+                       if (!needscan) {
+                               xfs_dir2_data_freeremove(d, dfp, needlogp);
+                               (void)xfs_dir2_data_freeinsert(d, newdup,
+                                       needlogp);
+                               (void)xfs_dir2_data_freeinsert(d, newdup2,
+                                       needlogp);
+                       }
+               }
+       }
+       *needscanp = needscan;
+}
diff --git a/libxfs/xfs_dir2_leaf.c b/libxfs/xfs_dir2_leaf.c
new file mode 100644 (file)
index 0000000..89761db
--- /dev/null
@@ -0,0 +1,1496 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+/*
+ * xfs_dir2_leaf.c
+ * XFS directory version 2 implementation - single leaf form
+ * see xfs_dir2_leaf.h for data structures.
+ * These directories have multiple XFS_DIR2_DATA blocks and one
+ * XFS_DIR2_LEAF1 block containing the hash table and freespace map.
+ */
+
+#include <xfs.h>
+
+
+/*
+ * Convert a block form directory to a leaf form directory.
+ */
+int                                            /* error */
+xfs_dir2_block_to_leaf(
+       xfs_da_args_t           *args,          /* operation arguments */
+       xfs_dabuf_t             *dbp)           /* input block's buffer */
+{
+       xfs_dir2_data_off_t     *bestsp;        /* leaf's bestsp entries */
+       xfs_dablk_t             blkno;          /* leaf block's bno */
+       xfs_dir2_block_t        *block;         /* block structure */
+       xfs_dir2_leaf_entry_t   *blp;           /* block's leaf entries */
+       xfs_dir2_block_tail_t   *btp;           /* block's tail */
+       xfs_inode_t             *dp;            /* incore directory inode */
+       int                     error;          /* error return code */
+       xfs_dabuf_t             *lbp;           /* leaf block's buffer */
+       xfs_dir2_db_t           ldb;            /* leaf block's bno */
+       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
+       xfs_dir2_leaf_tail_t    *ltp;           /* leaf's tail */
+       xfs_mount_t             *mp;            /* filesystem mount point */
+       int                     needlog;        /* need to log block header */
+       int                     needscan;       /* need to rescan bestfree */
+       xfs_trans_t             *tp;            /* transaction pointer */
+
+       xfs_dir2_trace_args_b("block_to_leaf", args, dbp);
+       dp = args->dp;
+       mp = dp->i_mount;
+       tp = args->trans;
+       /*
+        * Add the leaf block to the inode.
+        * This interface will only put blocks in the leaf/node range.
+        * Since that's empty now, we'll get the root (block 0 in range).
+        */
+       if (error = xfs_da_grow_inode(args, &blkno)) {
+#pragma mips_frequency_hint NEVER
+               return error;
+       }
+       ldb = XFS_DIR2_DA_TO_DB(mp, blkno);
+       ASSERT(ldb == XFS_DIR2_LEAF_FIRSTDB(mp));
+       /*
+        * Initialize the leaf block, get a buffer for it.
+        */
+       if (error = xfs_dir2_leaf_init(args, ldb, &lbp, XFS_DIR2_LEAF1_MAGIC)) {
+#pragma mips_frequency_hint NEVER
+               return error;
+       }
+       ASSERT(lbp != NULL);
+       leaf = lbp->data;
+       block = dbp->data;
+       xfs_dir2_data_check(dp, dbp);
+       btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+       blp = XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT);
+       /*
+        * Set the counts in the leaf header.
+        */
+       INT_COPY(leaf->hdr.count, btp->count, ARCH_CONVERT); /* INT_: type change */
+       INT_COPY(leaf->hdr.stale, btp->stale, ARCH_CONVERT); /* INT_: type change */
+       /*
+        * Could compact these but I think we always do the conversion
+        * after squeezing out stale entries.
+        */
+       bcopy(blp, leaf->ents, INT_GET(btp->count, ARCH_CONVERT) * sizeof(xfs_dir2_leaf_entry_t));
+       xfs_dir2_leaf_log_ents(tp, lbp, 0, INT_GET(leaf->hdr.count, ARCH_CONVERT) - 1);
+       needscan = 0;
+       needlog = 1;
+       /*
+        * Make the space formerly occupied by the leaf entries and block
+        * tail be free.
+        */
+       xfs_dir2_data_make_free(tp, dbp,
+               (xfs_dir2_data_aoff_t)((char *)blp - (char *)block),
+               (xfs_dir2_data_aoff_t)((char *)block + mp->m_dirblksize -
+                                      (char *)blp),
+               &needlog, &needscan);
+       /*
+        * Fix up the block header, make it a data block.
+        */
+       INT_SET(block->hdr.magic, ARCH_CONVERT, XFS_DIR2_DATA_MAGIC);
+       if (needscan)
+               xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog,
+                       NULL);
+       /*
+        * Set up leaf tail and bests table.
+        */
+       ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+       INT_SET(ltp->bestcount, ARCH_CONVERT, 1);
+       bestsp = XFS_DIR2_LEAF_BESTS_P_ARCH(ltp, ARCH_CONVERT);
+       INT_COPY(bestsp[0], block->hdr.bestfree[0].length, ARCH_CONVERT);
+       /*
+        * Log the data header and leaf bests table.
+        */
+       if (needlog)
+               xfs_dir2_data_log_header(tp, dbp);
+       xfs_dir2_leaf_check(dp, lbp);
+       xfs_dir2_data_check(dp, dbp);
+       xfs_dir2_leaf_log_bests(tp, lbp, 0, 0);
+       xfs_da_buf_done(lbp);
+       return 0;
+}
+
+/*
+ * Add an entry to a leaf form directory.
+ */
+int                                            /* error */
+xfs_dir2_leaf_addname(
+       xfs_da_args_t           *args)          /* operation arguments */
+{
+       xfs_dir2_data_off_t     *bestsp;        /* freespace table in leaf */
+       int                     compact;        /* need to compact leaves */
+       xfs_dir2_data_t         *data;          /* data block structure */
+       xfs_dabuf_t             *dbp;           /* data block buffer */
+       xfs_dir2_data_entry_t   *dep;           /* data block entry */
+       xfs_inode_t             *dp;            /* incore directory inode */
+       xfs_dir2_data_unused_t  *dup;           /* data unused entry */
+       int                     error;          /* error return value */
+       int                     grown;          /* allocated new data block */
+       int                     highstale;      /* index of next stale leaf */
+       int                     i;              /* temporary, index */
+       int                     index;          /* leaf table position */
+       xfs_dabuf_t             *lbp;           /* leaf's buffer */
+       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
+       int                     length;         /* length of new entry */
+       xfs_dir2_leaf_entry_t   *lep;           /* leaf entry table pointer */
+       int                     lfloglow;       /* low leaf logging index */
+       int                     lfloghigh;      /* high leaf logging index */
+       int                     lowstale;       /* index of prev stale leaf */
+       xfs_dir2_leaf_tail_t    *ltp;           /* leaf tail pointer */
+       xfs_mount_t             *mp;            /* filesystem mount point */
+       int                     needbytes;      /* leaf block bytes needed */
+       int                     needlog;        /* need to log data header */
+       int                     needscan;       /* need to rescan data free */
+       xfs_dir2_data_off_t     *tagp;          /* end of data entry */
+       xfs_trans_t             *tp;            /* transaction pointer */
+       xfs_dir2_db_t           use_block;      /* data block number */
+
+       xfs_dir2_trace_args("leaf_addname", args);
+       dp = args->dp;
+       tp = args->trans;
+       mp = dp->i_mount;
+       /*
+        * Read the leaf block.
+        */
+       error = xfs_da_read_buf(tp, dp, mp->m_dirleafblk, -1, &lbp,
+               XFS_DATA_FORK);
+       if (error) {
+#pragma mips_frequency_hint NEVER
+               return error;
+       }
+       ASSERT(lbp != NULL);
+       /*
+        * Look up the entry by hash value and name.
+        * We know it's not there, our caller has already done a lookup.
+        * So the index is of the entry to insert in front of.
+        * But if there are dup hash values the index is of the first of those.
+        */
+       index = xfs_dir2_leaf_search_hash(args, lbp);
+       leaf = lbp->data;
+       ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+       bestsp = XFS_DIR2_LEAF_BESTS_P_ARCH(ltp, ARCH_CONVERT);
+       length = XFS_DIR2_DATA_ENTSIZE(args->namelen);
+       /*
+        * See if there are any entries with the same hash value
+        * and space in their block for the new entry.
+        * This is good because it puts multiple same-hash value entries
+        * in a data block, improving the lookup of those entries.
+        */
+       for (use_block = -1, lep = &leaf->ents[index];
+            index < INT_GET(leaf->hdr.count, ARCH_CONVERT) && INT_GET(lep->hashval, ARCH_CONVERT) == args->hashval;
+            index++, lep++) {
+               if (INT_GET(lep->address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
+                       continue;
+               i = XFS_DIR2_DATAPTR_TO_DB(mp, INT_GET(lep->address, ARCH_CONVERT));
+               ASSERT(i < INT_GET(ltp->bestcount, ARCH_CONVERT));
+               ASSERT(INT_GET(bestsp[i], ARCH_CONVERT) != NULLDATAOFF);
+               if (INT_GET(bestsp[i], ARCH_CONVERT) >= length) {
+                       use_block = i;
+                       break;
+               }
+       }
+       /*
+        * Didn't find a block yet, linear search all the data blocks.
+        */
+       if (use_block == -1) {
+               for (i = 0; i < INT_GET(ltp->bestcount, ARCH_CONVERT); i++) {
+                       /*
+                        * Remember a block we see that's missing.
+                        */
+                       if (INT_GET(bestsp[i], ARCH_CONVERT) == NULLDATAOFF && use_block == -1)
+                               use_block = i;
+                       else if (INT_GET(bestsp[i], ARCH_CONVERT) >= length) {
+                               use_block = i;
+                               break;
+                       }
+               }
+       }
+       /*
+        * How many bytes do we need in the leaf block?
+        */
+       needbytes =
+               (INT_GET(leaf->hdr.stale, ARCH_CONVERT) != 0 ? 0 : (uint)sizeof(leaf->ents[0])) +
+               (use_block != -1 ? 0 : (uint)sizeof(leaf->bests[0]));
+       /*
+        * Now kill use_block if it refers to a missing block, so we
+        * can use it as an indication of allocation needed.
+        */
+       if (use_block != -1 && INT_GET(bestsp[use_block], ARCH_CONVERT) == NULLDATAOFF)
+               use_block = -1;
+       /*
+        * If we don't have enough free bytes but we can make enough
+        * by compacting out stale entries, we'll do that.
+        */
+       if ((char *)bestsp - (char *)&leaf->ents[INT_GET(leaf->hdr.count, ARCH_CONVERT)] < needbytes &&
+           INT_GET(leaf->hdr.stale, ARCH_CONVERT) > 1) {
+#pragma mips_frequency_hint NEVER
+               compact = 1;
+       }
+       /*
+        * Otherwise if we don't have enough free bytes we need to
+        * convert to node form.
+        */
+       else if ((char *)bestsp - (char *)&leaf->ents[INT_GET(leaf->hdr.count, ARCH_CONVERT)] <
+                needbytes) {
+#pragma mips_frequency_hint NEVER
+               /*
+                * Just checking or no space reservation, give up.
+                */
+               if (args->justcheck || args->total == 0) {
+                       xfs_da_brelse(tp, lbp);
+                       return XFS_ERROR(ENOSPC);
+               }
+               /*
+                * Convert to node form.
+                */
+               error = xfs_dir2_leaf_to_node(args, lbp);
+               xfs_da_buf_done(lbp);
+               if (error)
+                       return error;
+               /*
+                * Then add the new entry.
+                */
+               return xfs_dir2_node_addname(args);
+       }
+       /*
+        * Otherwise it will fit without compaction.
+        */
+       else
+               compact = 0;
+       /*
+        * If just checking, then it will fit unless we needed to allocate
+        * a new data block.
+        */
+       if (args->justcheck) {
+               xfs_da_brelse(tp, lbp);
+               return use_block == -1 ? XFS_ERROR(ENOSPC) : 0;
+       }
+       /*
+        * If no allocations are allowed, return now before we've
+        * changed anything.
+        */
+       if (args->total == 0 && use_block == -1) {
+#pragma mips_frequency_hint NEVER
+               xfs_da_brelse(tp, lbp);
+               return XFS_ERROR(ENOSPC);
+       }
+       /*
+        * Need to compact the leaf entries, removing stale ones.
+        * Leave one stale entry behind - the one closest to our
+        * insertion index - and we'll shift that one to our insertion
+        * point later.
+        */
+       if (compact) {
+#pragma mips_frequency_hint NEVER
+               xfs_dir2_leaf_compact_x1(lbp, &index, &lowstale, &highstale,
+                       &lfloglow, &lfloghigh);
+       }
+       /*
+        * There are stale entries, so we'll need log-low and log-high
+        * impossibly bad values later.
+        */
+       else if (INT_GET(leaf->hdr.stale, ARCH_CONVERT)) {
+               lfloglow = INT_GET(leaf->hdr.count, ARCH_CONVERT);
+               lfloghigh = -1;
+       }
+       /*
+        * If there was no data block space found, we need to allocate
+        * a new one.
+        */
+       if (use_block == -1) {
+#pragma mips_frequency_hint NEVER
+               /*
+                * Add the new data block.
+                */
+               if (error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE,
+                               &use_block)) {
+                       xfs_da_brelse(tp, lbp);
+                       return error;
+               }
+               /*
+                * Initialize the block.
+                */
+               if (error = xfs_dir2_data_init(args, use_block, &dbp)) {
+                       xfs_da_brelse(tp, lbp);
+                       return error;
+               }
+               /*
+                * If we're adding a new data block on the end we need to
+                * extend the bests table.  Copy it up one entry.
+                */
+               if (use_block >= INT_GET(ltp->bestcount, ARCH_CONVERT)) {
+                       bestsp--;
+                       ovbcopy(&bestsp[1], &bestsp[0],
+                               INT_GET(ltp->bestcount, ARCH_CONVERT) * sizeof(bestsp[0]));
+                       INT_MOD(ltp->bestcount, ARCH_CONVERT, +1);
+                       xfs_dir2_leaf_log_tail(tp, lbp);
+                       xfs_dir2_leaf_log_bests(tp, lbp, 0, INT_GET(ltp->bestcount, ARCH_CONVERT) - 1);
+               }
+               /*
+                * If we're filling in a previously empty block just log it.
+                */
+               else
+                       xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block);
+               data = dbp->data;
+               INT_COPY(bestsp[use_block], data->hdr.bestfree[0].length, ARCH_CONVERT);
+               grown = 1;
+       }
+       /*
+        * Already had space in some data block.
+        * Just read that one in.
+        */
+       else {
+               if (error =
+                   xfs_da_read_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, use_block),
+                           -1, &dbp, XFS_DATA_FORK)) {
+#pragma mips_frequency_hint NEVER
+                       xfs_da_brelse(tp, lbp);
+                       return error;
+               }
+               data = dbp->data;
+               grown = 0;
+       }
+       xfs_dir2_data_check(dp, dbp);
+       /*
+        * Point to the biggest freespace in our data block.
+        */
+       dup = (xfs_dir2_data_unused_t *)
+             ((char *)data + INT_GET(data->hdr.bestfree[0].offset, ARCH_CONVERT));
+       ASSERT(INT_GET(dup->length, ARCH_CONVERT) >= length);
+       needscan = needlog = 0;
+       /*
+        * Mark the initial part of our freespace in use for the new entry.
+        */
+       xfs_dir2_data_use_free(tp, dbp, dup,
+               (xfs_dir2_data_aoff_t)((char *)dup - (char *)data), length,
+               &needlog, &needscan);
+       /*
+        * Initialize our new entry (at last).
+        */
+       dep = (xfs_dir2_data_entry_t *)dup;
+       INT_SET(dep->inumber, ARCH_CONVERT, args->inumber);
+       dep->namelen = args->namelen;
+       bcopy(args->name, dep->name, dep->namelen);
+       tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
+       INT_SET(*tagp, ARCH_CONVERT, (xfs_dir2_data_off_t)((char *)dep - (char *)data));
+       /*
+        * Need to scan fix up the bestfree table.
+        */
+       if (needscan)
+               xfs_dir2_data_freescan(mp, data, &needlog, NULL);
+       /*
+        * Need to log the data block's header.
+        */
+       if (needlog)
+               xfs_dir2_data_log_header(tp, dbp);
+       xfs_dir2_data_log_entry(tp, dbp, dep);
+       /*
+        * If the bests table needs to be changed, do it.
+        * Log the change unless we've already done that.
+        */
+       if (INT_GET(bestsp[use_block], ARCH_CONVERT) != INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT)) {
+               INT_COPY(bestsp[use_block], data->hdr.bestfree[0].length, ARCH_CONVERT);
+               if (!grown)
+                       xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block);
+       }
+       /*
+        * Now we need to make room to insert the leaf entry.
+        * If there are no stale entries, we just insert a hole at index.
+        */
+       if (INT_GET(leaf->hdr.stale, ARCH_CONVERT) == 0) {
+               /*
+                * lep is still good as the index leaf entry.
+                */
+               if (index < INT_GET(leaf->hdr.count, ARCH_CONVERT))
+                       ovbcopy(lep, lep + 1,
+                               (INT_GET(leaf->hdr.count, ARCH_CONVERT) - index) * sizeof(*lep));
+               /*
+                * Record low and high logging indices for the leaf.
+                */
+               lfloglow = index;
+               lfloghigh = INT_GET(leaf->hdr.count, ARCH_CONVERT);
+               INT_MOD(leaf->hdr.count, ARCH_CONVERT, +1);
+       }
+       /*
+        * There are stale entries.
+        * We will use one of them for the new entry.
+        * It's probably not at the right location, so we'll have to
+        * shift some up or down first.
+        */
+       else {
+               /*
+                * If we didn't compact before, we need to find the nearest
+                * stale entries before and after our insertion point.
+                */
+               if (compact == 0) {
+                       /*
+                        * Find the first stale entry before the insertion
+                        * point, if any.
+                        */
+                       for (lowstale = index - 1;
+                            lowstale >= 0 &&
+                               INT_GET(leaf->ents[lowstale].address, ARCH_CONVERT) !=
+                               XFS_DIR2_NULL_DATAPTR;
+                            lowstale--)
+                               continue;
+                       /*
+                        * Find the next stale entry at or after the insertion
+                        * point, if any.   Stop if we go so far that the
+                        * lowstale entry would be better.
+                        */
+                       for (highstale = index;
+                            highstale < INT_GET(leaf->hdr.count, ARCH_CONVERT) &&
+                               INT_GET(leaf->ents[highstale].address, ARCH_CONVERT) !=
+                               XFS_DIR2_NULL_DATAPTR &&
+                               (lowstale < 0 ||
+                                index - lowstale - 1 >= highstale - index);
+                            highstale++)
+                               continue;
+               }
+               /*
+                * If the low one is better, use it.
+                */
+               if (lowstale >= 0 &&
+                   (highstale == INT_GET(leaf->hdr.count, ARCH_CONVERT) ||
+                    index - lowstale - 1 < highstale - index)) {
+                       ASSERT(index - lowstale - 1 >= 0);
+                       ASSERT(INT_GET(leaf->ents[lowstale].address, ARCH_CONVERT) ==
+                              XFS_DIR2_NULL_DATAPTR);
+                       /*
+                        * Copy entries up to cover the stale entry
+                        * and make room for the new entry.
+                        */
+                       if (index - lowstale - 1 > 0)
+                               ovbcopy(&leaf->ents[lowstale + 1],
+                                       &leaf->ents[lowstale],
+                                       (index - lowstale - 1) * sizeof(*lep));
+                       lep = &leaf->ents[index - 1];
+                       lfloglow = MIN(lowstale, lfloglow);
+                       lfloghigh = MAX(index - 1, lfloghigh);
+               }
+               /*
+                * The high one is better, so use that one.
+                */
+               else {
+                       ASSERT(highstale - index >= 0);
+                       ASSERT(INT_GET(leaf->ents[highstale].address, ARCH_CONVERT) ==
+                              XFS_DIR2_NULL_DATAPTR);
+                       /*
+                        * Copy entries down to copver the stale entry
+                        * and make room for the new entry.
+                        */
+                       if (highstale - index > 0)
+                               ovbcopy(&leaf->ents[index],
+                                       &leaf->ents[index + 1],
+                                       (highstale - index) * sizeof(*lep));
+                       lep = &leaf->ents[index];
+                       lfloglow = MIN(index, lfloglow);
+                       lfloghigh = MAX(highstale, lfloghigh);
+               }
+               INT_MOD(leaf->hdr.stale, ARCH_CONVERT, -1);
+       }
+       /*
+        * Fill in the new leaf entry.
+        */
+       INT_SET(lep->hashval, ARCH_CONVERT, args->hashval);
+       INT_SET(lep->address, ARCH_CONVERT, XFS_DIR2_DB_OFF_TO_DATAPTR(mp, use_block, INT_GET(*tagp, ARCH_CONVERT)));
+       /*
+        * Log the leaf fields and give up the buffers.
+        */
+       xfs_dir2_leaf_log_header(tp, lbp);
+       xfs_dir2_leaf_log_ents(tp, lbp, lfloglow, lfloghigh);
+       xfs_dir2_leaf_check(dp, lbp);
+       xfs_da_buf_done(lbp);
+       xfs_dir2_data_check(dp, dbp);
+       xfs_da_buf_done(dbp);
+       return 0;
+}
+
+
+#ifdef DEBUG
+/*
+ * Check the internal consistency of a leaf1 block.
+ * Pop an assert if something is wrong.
+ */
+void
+xfs_dir2_leaf_check(
+       xfs_inode_t             *dp,            /* incore directory inode */
+       xfs_dabuf_t             *bp)            /* leaf's buffer */
+{
+       int                     i;              /* leaf index */
+       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
+       xfs_dir2_leaf_tail_t    *ltp;           /* leaf tail pointer */
+       xfs_mount_t             *mp;            /* filesystem mount point */
+       int                     stale;          /* count of stale leaves */
+
+       leaf = bp->data;
+       mp = dp->i_mount;
+       ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAF1_MAGIC);
+       /*
+        * This value is not restrictive enough.
+        * Should factor in the size of the bests table as well.
+        * We can deduce a value for that from di_size.
+        */
+       ASSERT(INT_GET(leaf->hdr.count, ARCH_CONVERT) <= XFS_DIR2_MAX_LEAF_ENTS(mp));
+       ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+       /*
+        * Leaves and bests don't overlap.
+        */
+       ASSERT((char *)&leaf->ents[INT_GET(leaf->hdr.count, ARCH_CONVERT)] <=
+              (char *)XFS_DIR2_LEAF_BESTS_P_ARCH(ltp, ARCH_CONVERT));
+       /*
+        * Check hash value order, count stale entries.
+        */
+       for (i = stale = 0; i < INT_GET(leaf->hdr.count, ARCH_CONVERT); i++) {
+               if (i + 1 < INT_GET(leaf->hdr.count, ARCH_CONVERT))
+                       ASSERT(INT_GET(leaf->ents[i].hashval, ARCH_CONVERT) <=
+                              INT_GET(leaf->ents[i + 1].hashval, ARCH_CONVERT));
+               if (INT_GET(leaf->ents[i].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
+                       stale++;
+       }
+       ASSERT(INT_GET(leaf->hdr.stale, ARCH_CONVERT) == stale);
+}
+#endif /* DEBUG */
+
+/*
+ * Compact out any stale entries in the leaf.
+ * Log the header and changed leaf entries, if any.
+ */
+void
+xfs_dir2_leaf_compact(
+       xfs_da_args_t   *args,          /* operation arguments */
+       xfs_dabuf_t     *bp)            /* leaf buffer */
+{
+       int             from;           /* source leaf index */
+       xfs_dir2_leaf_t *leaf;          /* leaf structure */
+       int             loglow;         /* first leaf entry to log */
+       int             to;             /* target leaf index */
+
+       leaf = bp->data;
+       if (INT_GET(leaf->hdr.stale, ARCH_CONVERT) == 0) {
+#pragma mips_frequency_hint NEVER
+               return;
+       }
+       /*
+        * Compress out the stale entries in place.
+        */
+       for (from = to = 0, loglow = -1; from < INT_GET(leaf->hdr.count, ARCH_CONVERT); from++) {
+               if (INT_GET(leaf->ents[from].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
+                       continue;
+               /*
+                * Only actually copy the entries that are different.
+                */
+               if (from > to) {
+                       if (loglow == -1)
+                               loglow = to;
+                       leaf->ents[to] = leaf->ents[from];
+               }
+               to++;
+       }
+       /*
+        * Update and log the header, log the leaf entries.
+        */
+       ASSERT(INT_GET(leaf->hdr.stale, ARCH_CONVERT) == from - to);
+       INT_MOD(leaf->hdr.count, ARCH_CONVERT, -(INT_GET(leaf->hdr.stale, ARCH_CONVERT)));
+       INT_SET(leaf->hdr.stale, ARCH_CONVERT, 0);
+       xfs_dir2_leaf_log_header(args->trans, bp);
+       if (loglow != -1)
+               xfs_dir2_leaf_log_ents(args->trans, bp, loglow, to - 1);
+}
+
+/*
+ * Compact the leaf entries, removing stale ones.
+ * Leave one stale entry behind - the one closest to our
+ * insertion index - and the caller will shift that one to our insertion
+ * point later.
+ * Return new insertion index, where the remaining stale entry is,
+ * and leaf logging indices.
+ */
+void
+xfs_dir2_leaf_compact_x1(
+       xfs_dabuf_t     *bp,            /* leaf buffer */
+       int             *indexp,        /* insertion index */
+       int             *lowstalep,     /* out: stale entry before us */
+       int             *highstalep,    /* out: stale entry after us */
+       int             *lowlogp,       /* out: low log index */
+       int             *highlogp)      /* out: high log index */
+{
+       int             from;           /* source copy index */
+       int             highstale;      /* stale entry at/after index */
+       int             index;          /* insertion index */
+       int             keepstale;      /* source index of kept stale */
+       xfs_dir2_leaf_t *leaf;          /* leaf structure */
+       int             lowstale;       /* stale entry before index */
+       int             newindex;       /* new insertion index */
+       int             to;             /* destination copy index */
+
+       leaf = bp->data;
+       ASSERT(INT_GET(leaf->hdr.stale, ARCH_CONVERT) > 1);
+       index = *indexp;
+       /*
+        * Find the first stale entry before our index, if any.
+        */
+       for (lowstale = index - 1;
+            lowstale >= 0 &&
+               INT_GET(leaf->ents[lowstale].address, ARCH_CONVERT) != XFS_DIR2_NULL_DATAPTR;
+            lowstale--)
+               continue;
+       /*
+        * Find the first stale entry at or after our index, if any.
+        * Stop if the answer would be worse than lowstale.
+        */
+       for (highstale = index;
+            highstale < INT_GET(leaf->hdr.count, ARCH_CONVERT) &&
+               INT_GET(leaf->ents[highstale].address, ARCH_CONVERT) != XFS_DIR2_NULL_DATAPTR &&
+               (lowstale < 0 || index - lowstale > highstale - index);
+            highstale++)
+               continue;
+       /*
+        * Pick the better of lowstale and highstale.
+        */
+       if (lowstale >= 0 &&
+           (highstale == INT_GET(leaf->hdr.count, ARCH_CONVERT) ||
+            index - lowstale <= highstale - index))
+               keepstale = lowstale;
+       else
+               keepstale = highstale;
+       /*
+        * Copy the entries in place, removing all the stale entries
+        * except keepstale.
+        */
+       for (from = to = 0; from < INT_GET(leaf->hdr.count, ARCH_CONVERT); from++) {
+               /*
+                * Notice the new value of index.
+                */
+               if (index == from)
+                       newindex = to;
+               if (from != keepstale &&
+                   INT_GET(leaf->ents[from].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR) {
+                       if (from == to)
+                               *lowlogp = to;
+                       continue;
+               }
+               /*
+                * Record the new keepstale value for the insertion.
+                */
+               if (from == keepstale)
+                       lowstale = highstale = to;
+               /*
+                * Copy only the entries that have moved.
+                */
+               if (from > to)
+                       leaf->ents[to] = leaf->ents[from];
+               to++;
+       }
+       ASSERT(from > to);
+       /*
+        * If the insertion point was past the last entry,
+        * set the new insertion point accordingly.
+        */
+       if (index == from)
+               newindex = to;
+       *indexp = newindex;
+       /*
+        * Adjust the leaf header values.
+        */
+       INT_MOD(leaf->hdr.count, ARCH_CONVERT, -(from - to));
+       INT_SET(leaf->hdr.stale, ARCH_CONVERT, 1);
+       /*
+        * Remember the low/high stale value only in the "right"
+        * direction.
+        */
+       if (lowstale >= newindex)
+               lowstale = -1;
+       else
+               highstale = INT_GET(leaf->hdr.count, ARCH_CONVERT);
+       *highlogp = INT_GET(leaf->hdr.count, ARCH_CONVERT) - 1;
+       *lowstalep = lowstale;
+       *highstalep = highstale;
+}
+
+/*
+ * Initialize a new leaf block, leaf1 or leafn magic accepted.
+ */
+int
+xfs_dir2_leaf_init(
+       xfs_da_args_t           *args,          /* operation arguments */
+       xfs_dir2_db_t           bno,            /* directory block number */
+       xfs_dabuf_t             **bpp,          /* out: leaf buffer */
+       int                     magic)          /* magic number for block */
+{
+       xfs_dabuf_t             *bp;            /* leaf buffer */
+       xfs_inode_t             *dp;            /* incore directory inode */
+       int                     error;          /* error return code */
+       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
+       xfs_dir2_leaf_tail_t    *ltp;           /* leaf tail structure */
+       xfs_mount_t             *mp;            /* filesystem mount point */
+       xfs_trans_t             *tp;            /* transaction pointer */
+
+       dp = args->dp;
+       ASSERT(dp != NULL);
+       tp = args->trans;
+       mp = dp->i_mount;
+       ASSERT(bno >= XFS_DIR2_LEAF_FIRSTDB(mp) &&
+              bno < XFS_DIR2_FREE_FIRSTDB(mp));
+       /*
+        * Get the buffer for the block.
+        */
+       error = xfs_da_get_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, bno), -1, &bp,
+               XFS_DATA_FORK);
+       if (error) {
+#pragma mips_frequency_hint NEVER
+               return error;
+       }
+       ASSERT(bp != NULL);
+       leaf = bp->data;
+       /*
+        * Initialize the header.
+        */
+       INT_SET(leaf->hdr.info.magic, ARCH_CONVERT, magic);
+       INT_ZERO(leaf->hdr.info.forw, ARCH_CONVERT);
+        INT_ZERO(leaf->hdr.info.back, ARCH_CONVERT);
+       INT_ZERO(leaf->hdr.count, ARCH_CONVERT);
+       INT_ZERO(leaf->hdr.stale, ARCH_CONVERT);
+       xfs_dir2_leaf_log_header(tp, bp);
+       /*
+        * If it's a leaf-format directory initialize the tail.
+        * In this case our caller has the real bests table to copy into
+        * the block.
+        */
+       if (magic == XFS_DIR2_LEAF1_MAGIC) {
+               ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+               INT_SET(ltp->bestcount, ARCH_CONVERT, 0);
+               xfs_dir2_leaf_log_tail(tp, bp);
+       }
+       *bpp = bp;
+       return 0;
+}
+
+/*
+ * Log the bests entries indicated from a leaf1 block.
+ */
+void
+xfs_dir2_leaf_log_bests(
+       xfs_trans_t             *tp,            /* transaction pointer */
+       xfs_dabuf_t             *bp,            /* leaf buffer */
+       int                     first,          /* first entry to log */
+       int                     last)           /* last entry to log */
+{
+       xfs_dir2_data_off_t     *firstb;        /* pointer to first entry */
+       xfs_dir2_data_off_t     *lastb;         /* pointer to last entry */
+       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
+       xfs_dir2_leaf_tail_t    *ltp;           /* leaf tail structure */
+
+       leaf = bp->data;
+       ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAF1_MAGIC);
+       ltp = XFS_DIR2_LEAF_TAIL_P(tp->t_mountp, leaf);
+       firstb = XFS_DIR2_LEAF_BESTS_P_ARCH(ltp, ARCH_CONVERT) + first;
+       lastb = XFS_DIR2_LEAF_BESTS_P_ARCH(ltp, ARCH_CONVERT) + last;
+       xfs_da_log_buf(tp, bp, (uint)((char *)firstb - (char *)leaf),
+               (uint)((char *)lastb - (char *)leaf + sizeof(*lastb) - 1));
+}
+
+/*
+ * Log the leaf entries indicated from a leaf1 or leafn block.
+ */
+void
+xfs_dir2_leaf_log_ents(
+       xfs_trans_t             *tp,            /* transaction pointer */
+       xfs_dabuf_t             *bp,            /* leaf buffer */
+       int                     first,          /* first entry to log */
+       int                     last)           /* last entry to log */
+{
+       xfs_dir2_leaf_entry_t   *firstlep;      /* pointer to first entry */
+       xfs_dir2_leaf_entry_t   *lastlep;       /* pointer to last entry */
+       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
+
+       leaf = bp->data;
+       ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAF1_MAGIC ||
+              INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
+       firstlep = &leaf->ents[first];
+       lastlep = &leaf->ents[last];
+       xfs_da_log_buf(tp, bp, (uint)((char *)firstlep - (char *)leaf),
+               (uint)((char *)lastlep - (char *)leaf + sizeof(*lastlep) - 1));
+}
+
+/*
+ * Log the header of the leaf1 or leafn block.
+ */
+void
+xfs_dir2_leaf_log_header(
+       xfs_trans_t             *tp,            /* transaction pointer */
+       xfs_dabuf_t             *bp)            /* leaf buffer */
+{
+       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
+
+       leaf = bp->data;
+       ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAF1_MAGIC ||
+              INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
+       xfs_da_log_buf(tp, bp, (uint)((char *)&leaf->hdr - (char *)leaf),
+               (uint)(sizeof(leaf->hdr) - 1));
+}
+
+/*
+ * Log the tail of the leaf1 block.
+ */
+void
+xfs_dir2_leaf_log_tail(
+       xfs_trans_t             *tp,            /* transaction pointer */
+       xfs_dabuf_t             *bp)            /* leaf buffer */
+{
+       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
+       xfs_dir2_leaf_tail_t    *ltp;           /* leaf tail structure */
+       xfs_mount_t             *mp;            /* filesystem mount point */
+
+       mp = tp->t_mountp;
+       leaf = bp->data;
+       ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAF1_MAGIC);
+       ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+       xfs_da_log_buf(tp, bp, (uint)((char *)ltp - (char *)leaf),
+               (uint)(mp->m_dirblksize - 1));
+}
+
+/*
+ * Look up the entry referred to by args in the leaf format directory.
+ * Most of the work is done by the xfs_dir2_leaf_lookup_int routine which
+ * is also used by the node-format code.
+ */
+int
+xfs_dir2_leaf_lookup(
+       xfs_da_args_t           *args)          /* operation arguments */
+{
+       xfs_dabuf_t             *dbp;           /* data block buffer */
+       xfs_dir2_data_entry_t   *dep;           /* data block entry */
+       xfs_inode_t             *dp;            /* incore directory inode */
+       int                     error;          /* error return code */
+       int                     index;          /* found entry index */
+       xfs_dabuf_t             *lbp;           /* leaf buffer */
+       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
+       xfs_dir2_leaf_entry_t   *lep;           /* leaf entry */
+       xfs_trans_t             *tp;            /* transaction pointer */
+
+       xfs_dir2_trace_args("leaf_lookup", args);
+       /*
+        * Look up name in the leaf block, returning both buffers and index.
+        */
+       if (error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp)) {
+#pragma mips_frequency_hint NEVER
+               return error;
+       }
+       tp = args->trans;
+       dp = args->dp;
+       xfs_dir2_leaf_check(dp, lbp);
+       leaf = lbp->data;
+       /*
+        * Get to the leaf entry and contained data entry address.
+        */
+       lep = &leaf->ents[index];
+       /*
+        * Point to the data entry.
+        */
+       dep = (xfs_dir2_data_entry_t *)
+             ((char *)dbp->data +
+              XFS_DIR2_DATAPTR_TO_OFF(dp->i_mount, INT_GET(lep->address, ARCH_CONVERT)));
+       /*
+        * Return the found inode number.
+        */
+       args->inumber = INT_GET(dep->inumber, ARCH_CONVERT);
+       xfs_da_brelse(tp, dbp);
+       xfs_da_brelse(tp, lbp);
+       return XFS_ERROR(EEXIST);
+}
+
+/*
+ * Look up name/hash in the leaf block.
+ * Fill in indexp with the found index, and dbpp with the data buffer.
+ * If not found dbpp will be NULL, and ENOENT comes back.
+ * lbpp will always be filled in with the leaf buffer unless there's an error.
+ */
+STATIC int                                     /* error */
+xfs_dir2_leaf_lookup_int(
+       xfs_da_args_t           *args,          /* operation arguments */
+       xfs_dabuf_t             **lbpp,         /* out: leaf buffer */
+       int                     *indexp,        /* out: index in leaf block */
+       xfs_dabuf_t             **dbpp)         /* out: data buffer */
+{
+       xfs_dir2_db_t           curdb;          /* current data block number */
+       xfs_dabuf_t             *dbp;           /* data buffer */
+       xfs_dir2_data_entry_t   *dep;           /* data entry */
+       xfs_inode_t             *dp;            /* incore directory inode */
+       int                     error;          /* error return code */
+       int                     index;          /* index in leaf block */
+       xfs_dabuf_t             *lbp;           /* leaf buffer */
+       xfs_dir2_leaf_entry_t   *lep;           /* leaf entry */
+       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
+       xfs_mount_t             *mp;            /* filesystem mount point */
+       xfs_dir2_db_t           newdb;          /* new data block number */
+       xfs_trans_t             *tp;            /* transaction pointer */
+
+       dp = args->dp;
+       tp = args->trans;
+       mp = dp->i_mount;
+       /*
+        * Read the leaf block into the buffer.
+        */
+       if (error =
+           xfs_da_read_buf(tp, dp, mp->m_dirleafblk, -1, &lbp,
+                   XFS_DATA_FORK)) {
+#pragma mips_frequency_hint NEVER
+               return error;
+       }
+       *lbpp = lbp;
+       leaf = lbp->data;
+       xfs_dir2_leaf_check(dp, lbp);
+       /*
+        * Look for the first leaf entry with our hash value.
+        */
+       index = xfs_dir2_leaf_search_hash(args, lbp);
+       /*
+        * Loop over all the entries with the right hash value
+        * looking to match the name.
+        */
+       for (lep = &leaf->ents[index], dbp = NULL, curdb = -1;
+            index < INT_GET(leaf->hdr.count, ARCH_CONVERT) && INT_GET(lep->hashval, ARCH_CONVERT) == args->hashval;
+            lep++, index++) {
+               /*
+                * Skip over stale leaf entries.
+                */
+               if (INT_GET(lep->address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
+                       continue;
+               /*
+                * Get the new data block number.
+                */
+               newdb = XFS_DIR2_DATAPTR_TO_DB(mp, INT_GET(lep->address, ARCH_CONVERT));
+               /*
+                * If it's not the same as the old data block number,
+                * need to pitch the old one and read the new one.
+                */
+               if (newdb != curdb) {
+                       if (dbp)
+                               xfs_da_brelse(tp, dbp);
+                       if (error =
+                           xfs_da_read_buf(tp, dp,
+                                   XFS_DIR2_DB_TO_DA(mp, newdb), -1, &dbp,
+                                   XFS_DATA_FORK)) {
+#pragma mips_frequency_hint NEVER
+                               xfs_da_brelse(tp, lbp);
+                               return error;
+                       }
+                       xfs_dir2_data_check(dp, dbp);
+                       curdb = newdb;
+               }
+               /*
+                * Point to the data entry.
+                */
+               dep = (xfs_dir2_data_entry_t *)
+                     ((char *)dbp->data +
+                      XFS_DIR2_DATAPTR_TO_OFF(mp, INT_GET(lep->address, ARCH_CONVERT)));
+               /*
+                * If it matches then return it.
+                */
+               if (dep->namelen == args->namelen &&
+                   dep->name[0] == args->name[0] &&
+                   bcmp(dep->name, args->name, args->namelen) == 0) {
+                       *dbpp = dbp;
+                       *indexp = index;
+                       return 0;
+               }
+       }
+       /*
+        * No match found, return ENOENT.
+        */
+       ASSERT(args->oknoent);
+       if (dbp)
+               xfs_da_brelse(tp, dbp);
+       xfs_da_brelse(tp, lbp);
+       return XFS_ERROR(ENOENT);
+}
+
+/*
+ * Remove an entry from a leaf format directory.
+ */
+int                                            /* error */
+xfs_dir2_leaf_removename(
+       xfs_da_args_t           *args)          /* operation arguments */
+{
+       xfs_dir2_data_off_t     *bestsp;        /* leaf block best freespace */
+       xfs_dir2_data_t         *data;          /* data block structure */
+       xfs_dir2_db_t           db;             /* data block number */
+       xfs_dabuf_t             *dbp;           /* data block buffer */
+       xfs_dir2_data_entry_t   *dep;           /* data entry structure */
+       xfs_inode_t             *dp;            /* incore directory inode */
+       int                     error;          /* error return code */
+       xfs_dir2_db_t           i;              /* temporary data block # */
+       int                     index;          /* index into leaf entries */
+       xfs_dabuf_t             *lbp;           /* leaf buffer */
+       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
+       xfs_dir2_leaf_entry_t   *lep;           /* leaf entry */
+       xfs_dir2_leaf_tail_t    *ltp;           /* leaf tail structure */
+       xfs_mount_t             *mp;            /* filesystem mount point */
+       int                     needlog;        /* need to log data header */
+       int                     needscan;       /* need to rescan data frees */
+       xfs_dir2_data_off_t     oldbest;        /* old value of best free */
+       xfs_trans_t             *tp;            /* transaction pointer */
+
+       xfs_dir2_trace_args("leaf_removename", args);
+       /*
+        * Lookup the leaf entry, get the leaf and data blocks read in.
+        */
+       if (error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp)) {
+#pragma mips_frequency_hint NEVER
+               return error;
+       }
+       dp = args->dp;
+       tp = args->trans;
+       mp = dp->i_mount;
+       leaf = lbp->data;
+       data = dbp->data;
+       xfs_dir2_data_check(dp, dbp);
+       /*
+        * Point to the leaf entry, use that to point to the data entry.
+        */
+       lep = &leaf->ents[index];
+       db = XFS_DIR2_DATAPTR_TO_DB(mp, INT_GET(lep->address, ARCH_CONVERT));
+       dep = (xfs_dir2_data_entry_t *)
+             ((char *)data + XFS_DIR2_DATAPTR_TO_OFF(mp, INT_GET(lep->address, ARCH_CONVERT)));
+       needscan = needlog = 0;
+       oldbest = INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT);
+       ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+       bestsp = XFS_DIR2_LEAF_BESTS_P_ARCH(ltp, ARCH_CONVERT);
+       ASSERT(INT_GET(bestsp[db], ARCH_CONVERT) == oldbest);
+       /*
+        * Mark the former data entry unused.
+        */
+       xfs_dir2_data_make_free(tp, dbp,
+               (xfs_dir2_data_aoff_t)((char *)dep - (char *)data),
+               XFS_DIR2_DATA_ENTSIZE(dep->namelen), &needlog, &needscan);
+       /*
+        * We just mark the leaf entry stale by putting a null in it.
+        */
+       INT_MOD(leaf->hdr.stale, ARCH_CONVERT, +1);
+       xfs_dir2_leaf_log_header(tp, lbp);
+       INT_SET(lep->address, ARCH_CONVERT, XFS_DIR2_NULL_DATAPTR);
+       xfs_dir2_leaf_log_ents(tp, lbp, index, index);
+       /*
+        * Scan the freespace in the data block again if necessary,
+        * log the data block header if necessary.
+        */
+       if (needscan)
+               xfs_dir2_data_freescan(mp, data, &needlog, NULL);
+       if (needlog)
+               xfs_dir2_data_log_header(tp, dbp);
+       /*
+        * If the longest freespace in the data block has changed,
+        * put the new value in the bests table and log that.
+        */
+       if (INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT) != oldbest) {
+               INT_COPY(bestsp[db], data->hdr.bestfree[0].length, ARCH_CONVERT);
+               xfs_dir2_leaf_log_bests(tp, lbp, db, db);
+       }
+       xfs_dir2_data_check(dp, dbp);
+       /*
+        * If the data block is now empty then get rid of the data block.
+        */
+       if (INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT) ==
+           mp->m_dirblksize - (uint)sizeof(data->hdr)) {
+#pragma mips_frequency_hint NEVER
+               ASSERT(db != mp->m_dirdatablk);
+               if (error = xfs_dir2_shrink_inode(args, db, dbp)) {
+                       /*
+                        * Nope, can't get rid of it because it caused
+                        * allocation of a bmap btree block to do so.
+                        * Just go on, returning success, leaving the
+                        * empty block in place.
+                        */
+                       if (error == ENOSPC && args->total == 0) {
+                               xfs_da_buf_done(dbp);
+                               error = 0;
+                       }
+                       xfs_dir2_leaf_check(dp, lbp);
+                       xfs_da_buf_done(lbp);
+                       return error;
+               }
+               dbp = NULL;
+               /*
+                * If this is the last data block then compact the
+                * bests table by getting rid of entries.
+                */
+               if (db == INT_GET(ltp->bestcount, ARCH_CONVERT) - 1) {
+                       /*
+                        * Look for the last active entry (i).
+                        */
+                       for (i = db - 1; i > 0; i--) {
+                               if (INT_GET(bestsp[i], ARCH_CONVERT) != NULLDATAOFF)
+                                       break;
+                       }
+                       /*
+                        * Copy the table down so inactive entries at the
+                        * end are removed.
+                        */
+                       ovbcopy(bestsp, &bestsp[db - i],
+                               (INT_GET(ltp->bestcount, ARCH_CONVERT) - (db - i)) * sizeof(*bestsp));
+                       INT_MOD(ltp->bestcount, ARCH_CONVERT, -(db - i));
+                       xfs_dir2_leaf_log_tail(tp, lbp);
+                       xfs_dir2_leaf_log_bests(tp, lbp, 0, INT_GET(ltp->bestcount, ARCH_CONVERT) - 1);
+               } else
+                       INT_SET(bestsp[db], ARCH_CONVERT, NULLDATAOFF);
+       }
+       /*
+        * If the data block was not the first one, drop it.
+        */
+       else if (db != mp->m_dirdatablk && dbp != NULL) {
+               xfs_da_buf_done(dbp);
+               dbp = NULL;
+       }
+       xfs_dir2_leaf_check(dp, lbp);
+       /*
+        * See if we can convert to block form.
+        */
+       return xfs_dir2_leaf_to_block(args, lbp, dbp);
+}
+
+/*
+ * Replace the inode number in a leaf format directory entry.
+ */
+int                                            /* error */
+xfs_dir2_leaf_replace(
+       xfs_da_args_t           *args)          /* operation arguments */
+{
+       xfs_dabuf_t             *dbp;           /* data block buffer */
+       xfs_dir2_data_entry_t   *dep;           /* data block entry */
+       xfs_inode_t             *dp;            /* incore directory inode */
+       int                     error;          /* error return code */
+       int                     index;          /* index of leaf entry */
+       xfs_dabuf_t             *lbp;           /* leaf buffer */
+       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
+       xfs_dir2_leaf_entry_t   *lep;           /* leaf entry */
+       xfs_trans_t             *tp;            /* transaction pointer */
+
+       xfs_dir2_trace_args("leaf_replace", args);
+       /*
+        * Look up the entry.
+        */
+       if (error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp)) {
+#pragma mips_frequency_hint NEVER
+               return error;
+       }
+       dp = args->dp;
+       leaf = lbp->data;
+       /*
+        * Point to the leaf entry, get data address from it.
+        */
+       lep = &leaf->ents[index];
+       /*
+        * Point to the data entry.
+        */
+       dep = (xfs_dir2_data_entry_t *)
+             ((char *)dbp->data +
+              XFS_DIR2_DATAPTR_TO_OFF(dp->i_mount, INT_GET(lep->address, ARCH_CONVERT)));
+       ASSERT(args->inumber != INT_GET(dep->inumber, ARCH_CONVERT));
+       /*
+        * Put the new inode number in, log it.
+        */
+       INT_SET(dep->inumber, ARCH_CONVERT, args->inumber);
+       tp = args->trans;
+       xfs_dir2_data_log_entry(tp, dbp, dep);
+       xfs_da_buf_done(dbp);
+       xfs_dir2_leaf_check(dp, lbp);
+       xfs_da_brelse(tp, lbp);
+       return 0;
+}
+
+/*
+ * Return index in the leaf block (lbp) which is either the first
+ * one with this hash value, or if there are none, the insert point
+ * for that hash value.
+ */
+int                                            /* index value */
+xfs_dir2_leaf_search_hash(
+       xfs_da_args_t           *args,          /* operation arguments */
+       xfs_dabuf_t             *lbp)           /* leaf buffer */
+{
+       xfs_dahash_t            hash;           /* hash from this entry */
+       xfs_dahash_t            hashwant;       /* hash value looking for */
+       int                     high;           /* high leaf index */
+       int                     low;            /* low leaf index */
+       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
+       xfs_dir2_leaf_entry_t   *lep;           /* leaf entry */
+       int                     mid;            /* current leaf index */
+
+       leaf = lbp->data;
+#ifndef __KERNEL__
+       if (INT_GET(leaf->hdr.count, ARCH_CONVERT) == 0)
+               return 0;
+#endif
+       /*
+        * Note, the table cannot be empty, so we have to go through the loop.
+        * Binary search the leaf entries looking for our hash value.
+        */
+       for (lep = leaf->ents, low = 0, high = INT_GET(leaf->hdr.count, ARCH_CONVERT) - 1,
+               hashwant = args->hashval;
+            low <= high; ) {
+               mid = (low + high) >> 1;
+               if ((hash = INT_GET(lep[mid].hashval, ARCH_CONVERT)) == hashwant)
+                       break;
+               if (hash < hashwant)
+                       low = mid + 1;
+               else
+                       high = mid - 1;
+       }
+       /*
+        * Found one, back up through all the equal hash values.
+        */
+       if (hash == hashwant) {
+               while (mid > 0 && INT_GET(lep[mid - 1].hashval, ARCH_CONVERT) == hashwant) {
+#pragma mips_frequency_hint NEVER
+                       mid--;
+               }
+       }
+       /*
+        * Need to point to an entry higher than ours.
+        */
+       else if (hash < hashwant)
+               mid++;
+       return mid;
+}
+
+/*
+ * Trim off a trailing data block.  We know it's empty since the leaf
+ * freespace table says so.
+ */
+int                                            /* error */
+xfs_dir2_leaf_trim_data(
+       xfs_da_args_t           *args,          /* operation arguments */
+       xfs_dabuf_t             *lbp,           /* leaf buffer */
+       xfs_dir2_db_t           db)             /* data block number */
+{
+       xfs_dir2_data_off_t     *bestsp;        /* leaf bests table */
+#ifdef DEBUG
+       xfs_dir2_data_t         *data;          /* data block structure */
+#endif
+       xfs_dabuf_t             *dbp;           /* data block buffer */
+       xfs_inode_t             *dp;            /* incore directory inode */
+       int                     error;          /* error return value */
+       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
+       xfs_dir2_leaf_tail_t    *ltp;           /* leaf tail structure */
+       xfs_mount_t             *mp;            /* filesystem mount point */
+       xfs_trans_t             *tp;            /* transaction pointer */
+
+       dp = args->dp;
+       mp = dp->i_mount;
+       tp = args->trans;
+       /*
+        * Read the offending data block.  We need its buffer.
+        */
+       if (error = xfs_da_read_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, db), -1, &dbp,
+                       XFS_DATA_FORK)) {
+#pragma mips_frequency_hint NEVER
+               return error;
+       }
+#ifdef DEBUG
+       data = dbp->data;
+       ASSERT(INT_GET(data->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC);
+#endif
+       /* this seems to be an error 
+        * data is only valid if DEBUG is defined?
+        * RMC 09/08/1999
+        */
+
+       leaf = lbp->data;
+       ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+       ASSERT(INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT) ==
+              mp->m_dirblksize - (uint)sizeof(data->hdr));
+       ASSERT(db == INT_GET(ltp->bestcount, ARCH_CONVERT) - 1);
+       /*
+        * Get rid of the data block.
+        */
+       if (error = xfs_dir2_shrink_inode(args, db, dbp)) {
+#pragma mips_frequency_hint NEVER
+               ASSERT(error != ENOSPC);
+               xfs_da_brelse(tp, dbp);
+               return error;
+       }
+       /*
+        * Eliminate the last bests entry from the table.
+        */
+       bestsp = XFS_DIR2_LEAF_BESTS_P_ARCH(ltp, ARCH_CONVERT);
+       INT_MOD(ltp->bestcount, ARCH_CONVERT, -1);
+       ovbcopy(&bestsp[0], &bestsp[1], INT_GET(ltp->bestcount, ARCH_CONVERT) * sizeof(*bestsp));
+       xfs_dir2_leaf_log_tail(tp, lbp);
+       xfs_dir2_leaf_log_bests(tp, lbp, 0, INT_GET(ltp->bestcount, ARCH_CONVERT) - 1);
+       return 0;
+}
+
+/*
+ * Convert node form directory to leaf form directory.
+ * The root of the node form dir needs to already be a LEAFN block.
+ * Just return if we can't do anything.
+ */
+int                                            /* error */
+xfs_dir2_node_to_leaf(
+       xfs_da_state_t          *state)         /* directory operation state */
+{
+       xfs_da_args_t           *args;          /* operation arguments */
+       xfs_inode_t             *dp;            /* incore directory inode */
+       int                     error;          /* error return code */
+       xfs_dabuf_t             *fbp;           /* buffer for freespace block */
+       xfs_fileoff_t           fo;             /* freespace file offset */
+       xfs_dir2_free_t         *free;          /* freespace structure */
+       xfs_dabuf_t             *lbp;           /* buffer for leaf block */
+       xfs_dir2_leaf_tail_t    *ltp;           /* tail of leaf structure */
+       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
+       xfs_mount_t             *mp;            /* filesystem mount point */
+       int                     rval;           /* successful free trim? */
+       xfs_trans_t             *tp;            /* transaction pointer */
+
+       /*
+        * There's more than a leaf level in the btree, so there must
+        * be multiple leafn blocks.  Give up.
+        */
+       if (state->path.active > 1)
+               return 0;
+       args = state->args;
+       xfs_dir2_trace_args("node_to_leaf", args);
+       mp = state->mp;
+       dp = args->dp;
+       tp = args->trans;
+       /*
+        * Get the last offset in the file.
+        */
+       if (error = xfs_bmap_last_offset(tp, dp, &fo, XFS_DATA_FORK)) {
+#pragma mips_frequency_hint NEVER
+               return error;
+       }
+       fo -= mp->m_dirblkfsbs;
+       /*
+        * If there are freespace blocks other than the first one,
+        * take this opportunity to remove trailing empty freespace blocks
+        * that may have been left behind during no-space-reservation
+        * operations.
+        */
+       while (fo > mp->m_dirfreeblk) {
+               if (error = xfs_dir2_node_trim_free(args, fo, &rval)) {
+#pragma mips_frequency_hint NEVER
+                       return error;
+               }
+               if (rval)
+                       fo -= mp->m_dirblkfsbs;
+               else
+                       return 0;
+       }
+       /*
+        * Now find the block just before the freespace block.
+        */
+       if (error = xfs_bmap_last_before(tp, dp, &fo, XFS_DATA_FORK)) {
+#pragma mips_frequency_hint NEVER
+               return error;
+       }
+       /*
+        * If it's not the single leaf block, give up.
+        */
+       if (XFS_FSB_TO_B(mp, fo) > XFS_DIR2_LEAF_OFFSET + mp->m_dirblksize)
+               return 0;
+       lbp = state->path.blk[0].bp;
+       leaf = lbp->data;
+       ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
+       /*
+        * Read the freespace block.
+        */
+       if (error = xfs_da_read_buf(tp, dp, mp->m_dirfreeblk, -1, &fbp,
+                       XFS_DATA_FORK)) {
+#pragma mips_frequency_hint NEVER
+               return error;
+       }
+       free = fbp->data;
+       ASSERT(INT_GET(free->hdr.magic, ARCH_CONVERT) == XFS_DIR2_FREE_MAGIC);
+       ASSERT(INT_ISZERO(free->hdr.firstdb, ARCH_CONVERT));
+       /*
+        * Now see if the leafn and free data will fit in a leaf1.
+        * If not, release the buffer and give up.
+        */
+       if ((uint)sizeof(leaf->hdr) +
+           (INT_GET(leaf->hdr.count, ARCH_CONVERT) - INT_GET(leaf->hdr.stale, ARCH_CONVERT)) * (uint)sizeof(leaf->ents[0]) +
+           INT_GET(free->hdr.nvalid, ARCH_CONVERT) * (uint)sizeof(leaf->bests[0]) +
+           (uint)sizeof(leaf->tail) >
+           mp->m_dirblksize) {
+               xfs_da_brelse(tp, fbp);
+               return 0;
+       }
+       /*
+        * If the leaf has any stale entries in it, compress them out.
+        * The compact routine will log the header.
+        */
+       if (INT_GET(leaf->hdr.stale, ARCH_CONVERT))
+               xfs_dir2_leaf_compact(args, lbp);
+       else
+               xfs_dir2_leaf_log_header(tp, lbp);
+       INT_SET(leaf->hdr.info.magic, ARCH_CONVERT, XFS_DIR2_LEAF1_MAGIC);
+       /*
+        * Set up the leaf tail from the freespace block.
+        */
+       ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+       INT_COPY(ltp->bestcount, free->hdr.nvalid, ARCH_CONVERT);
+       /*
+        * Set up the leaf bests table.
+        */
+       bcopy(free->bests, XFS_DIR2_LEAF_BESTS_P_ARCH(ltp, ARCH_CONVERT),
+               INT_GET(ltp->bestcount, ARCH_CONVERT) * sizeof(leaf->bests[0]));
+       xfs_dir2_leaf_log_bests(tp, lbp, 0, INT_GET(ltp->bestcount, ARCH_CONVERT) - 1);
+       xfs_dir2_leaf_log_tail(tp, lbp);
+       xfs_dir2_leaf_check(dp, lbp);
+       /*
+        * Get rid of the freespace block.
+        */
+       error = xfs_dir2_shrink_inode(args, XFS_DIR2_FREE_FIRSTDB(mp), fbp);
+       if (error) {
+#pragma mips_frequency_hint NEVER
+               /*
+                * This can't fail here because it can only happen when
+                * punching out the middle of an extent, and this is an
+                * isolated block.
+                */
+               ASSERT(error != ENOSPC);
+               return error;
+       }
+       fbp = NULL;
+       /*
+        * Now see if we can convert the single-leaf directory
+        * down to a block form directory.
+        * This routine always kills the dabuf for the leaf, so
+        * eliminate it from the path.
+        */
+       error = xfs_dir2_leaf_to_block(args, lbp, NULL);
+       state->path.blk[0].bp = NULL;
+       return error;
+}
diff --git a/libxfs/xfs_dir2_node.c b/libxfs/xfs_dir2_node.c
new file mode 100644 (file)
index 0000000..b6050ba
--- /dev/null
@@ -0,0 +1,1988 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+/*
+ * xfs_dir2_node.c
+ * XFS directory implementation, version 2, node form files
+ * See data structures in xfs_dir2_node.h and xfs_da_btree.h.
+ */
+
+#include <xfs.h>
+
+/*
+ * Log entries from a freespace block.
+ */
+void
+xfs_dir2_free_log_bests(
+       xfs_trans_t             *tp,            /* transaction pointer */
+       xfs_dabuf_t             *bp,            /* freespace buffer */
+       int                     first,          /* first entry to log */
+       int                     last)           /* last entry to log */
+{
+       xfs_dir2_free_t         *free;          /* freespace structure */
+
+       free = bp->data;
+       ASSERT(INT_GET(free->hdr.magic, ARCH_CONVERT) == XFS_DIR2_FREE_MAGIC);
+       xfs_da_log_buf(tp, bp,
+               (uint)((char *)&free->bests[first] - (char *)free),
+               (uint)((char *)&free->bests[last] - (char *)free +
+                      sizeof(free->bests[0]) - 1));
+}
+
+/*
+ * Log header from a freespace block.
+ */
+static void
+xfs_dir2_free_log_header(
+       xfs_trans_t             *tp,            /* transaction pointer */
+       xfs_dabuf_t             *bp)            /* freespace buffer */
+{
+       xfs_dir2_free_t         *free;          /* freespace structure */
+
+       free = bp->data;
+       ASSERT(INT_GET(free->hdr.magic, ARCH_CONVERT) == XFS_DIR2_FREE_MAGIC);
+       xfs_da_log_buf(tp, bp, (uint)((char *)&free->hdr - (char *)free),
+               (uint)(sizeof(xfs_dir2_free_hdr_t) - 1));
+}
+
+/*
+ * Convert a leaf-format directory to a node-format directory.
+ * We need to change the magic number of the leaf block, and copy
+ * the freespace table out of the leaf block into its own block.
+ */
+int                                            /* error */
+xfs_dir2_leaf_to_node(
+       xfs_da_args_t           *args,          /* operation arguments */
+       xfs_dabuf_t             *lbp)           /* leaf buffer */
+{
+       xfs_inode_t             *dp;            /* incore directory inode */
+       int                     error;          /* error return value */
+       xfs_dabuf_t             *fbp;           /* freespace buffer */
+       xfs_dir2_db_t           fdb;            /* freespace block number */
+       xfs_dir2_free_t         *free;          /* freespace structure */
+       xfs_dir2_data_off_t     *from;          /* pointer to freespace entry */
+       int                     i;              /* leaf freespace index */
+       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
+       xfs_dir2_leaf_tail_t    *ltp;           /* leaf tail structure */
+       xfs_mount_t             *mp;            /* filesystem mount point */
+       int                     n;              /* count of live freespc ents */
+       xfs_dir2_data_off_t     off;            /* freespace entry value */
+       xfs_dir2_data_off_t     *to;            /* pointer to freespace entry */
+       xfs_trans_t             *tp;            /* transaction pointer */
+
+       xfs_dir2_trace_args_b("leaf_to_node", args, lbp);
+       dp = args->dp;
+       mp = dp->i_mount;
+       tp = args->trans;
+       /*
+        * Add a freespace block to the directory.
+        */
+       if (error = xfs_dir2_grow_inode(args, XFS_DIR2_FREE_SPACE, &fdb)) {
+#pragma mips_frequency_hint NEVER
+               return error;
+       }
+       ASSERT(fdb == XFS_DIR2_FREE_FIRSTDB(mp));
+       /*
+        * Get the buffer for the new freespace block.
+        */
+       if (error = xfs_da_get_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, fdb), -1, &fbp,
+                       XFS_DATA_FORK)) {
+#pragma mips_frequency_hint NEVER
+               return error;
+       }
+       ASSERT(fbp != NULL);
+       free = fbp->data;
+       leaf = lbp->data;
+       ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+       /*
+        * Initialize the freespace block header.
+        */
+       INT_SET(free->hdr.magic, ARCH_CONVERT, XFS_DIR2_FREE_MAGIC);
+       INT_ZERO(free->hdr.firstdb, ARCH_CONVERT);
+       ASSERT(INT_GET(ltp->bestcount, ARCH_CONVERT) <= (uint)dp->i_d.di_size / mp->m_dirblksize);
+       INT_COPY(free->hdr.nvalid, ltp->bestcount, ARCH_CONVERT);
+       /*
+        * Copy freespace entries from the leaf block to the new block.
+        * Count active entries.
+        */
+       for (i = n = 0, from = XFS_DIR2_LEAF_BESTS_P_ARCH(ltp, ARCH_CONVERT), to = free->bests;
+            i < INT_GET(ltp->bestcount, ARCH_CONVERT); i++, from++, to++) {
+               if ((off = INT_GET(*from, ARCH_CONVERT)) != NULLDATAOFF)
+                       n++;
+               INT_SET(*to, ARCH_CONVERT, off);
+       }
+       INT_SET(free->hdr.nused, ARCH_CONVERT, n);
+       INT_SET(leaf->hdr.info.magic, ARCH_CONVERT, XFS_DIR2_LEAFN_MAGIC);
+       /*
+        * Log everything.
+        */
+       xfs_dir2_leaf_log_header(tp, lbp);
+       xfs_dir2_free_log_header(tp, fbp);
+       xfs_dir2_free_log_bests(tp, fbp, 0, INT_GET(free->hdr.nvalid, ARCH_CONVERT) - 1);
+       xfs_da_buf_done(fbp);
+       xfs_dir2_leafn_check(dp, lbp);
+       return 0;
+}
+
+/*
+ * Add a leaf entry to a leaf block in a node-form directory.
+ * The other work necessary is done from the caller.
+ */
+static int                                     /* error */
+xfs_dir2_leafn_add(
+       xfs_dabuf_t             *bp,            /* leaf buffer */
+       xfs_da_args_t           *args,          /* operation arguments */
+       int                     index)          /* insertion pt for new entry */
+{
+       int                     compact;        /* compacting stale leaves */
+       xfs_inode_t             *dp;            /* incore directory inode */
+       int                     highstale;      /* next stale entry */
+       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
+       xfs_dir2_leaf_entry_t   *lep;           /* leaf entry */
+       int                     lfloghigh;      /* high leaf entry logging */
+       int                     lfloglow;       /* low leaf entry logging */
+       int                     lowstale;       /* previous stale entry */
+       xfs_mount_t             *mp;            /* filesystem mount point */
+       xfs_trans_t             *tp;            /* transaction pointer */
+
+       xfs_dir2_trace_args_sb("leafn_add", args, index, bp);
+       dp = args->dp;
+       mp = dp->i_mount;
+       tp = args->trans;
+       leaf = bp->data;
+       /*
+        * If there are already the maximum number of leaf entries in
+        * the block, if there are no stale entries it won't fit.
+        * Caller will do a split.  If there are stale entries we'll do
+        * a compact.
+        */
+       if (INT_GET(leaf->hdr.count, ARCH_CONVERT) == XFS_DIR2_MAX_LEAF_ENTS(mp)) {
+#pragma mips_frequency_hint NEVER
+               if (INT_ISZERO(leaf->hdr.stale, ARCH_CONVERT))
+                       return XFS_ERROR(ENOSPC);
+               compact = INT_GET(leaf->hdr.stale, ARCH_CONVERT) > 1;
+       } else
+               compact = 0;
+       ASSERT(index == 0 || INT_GET(leaf->ents[index - 1].hashval, ARCH_CONVERT) <= args->hashval);
+       ASSERT(index == INT_GET(leaf->hdr.count, ARCH_CONVERT) ||
+              INT_GET(leaf->ents[index].hashval, ARCH_CONVERT) >= args->hashval);
+       
+       if (args->justcheck)
+               return 0;
+
+       /*
+        * Compact out all but one stale leaf entry.  Leaves behind
+        * the entry closest to index.
+        */
+       if (compact) {
+#pragma mips_frequency_hint NEVER
+               xfs_dir2_leaf_compact_x1(bp, &index, &lowstale, &highstale,
+                       &lfloglow, &lfloghigh);
+       }
+       /*
+        * Set impossible logging indices for this case.
+        */
+       else if (!INT_ISZERO(leaf->hdr.stale, ARCH_CONVERT)) {
+               lfloglow = INT_GET(leaf->hdr.count, ARCH_CONVERT);
+               lfloghigh = -1;
+       }
+       /*
+        * No stale entries, just insert a space for the new entry.
+        */
+       if (INT_ISZERO(leaf->hdr.stale, ARCH_CONVERT)) {
+               lep = &leaf->ents[index];
+               if (index < INT_GET(leaf->hdr.count, ARCH_CONVERT))
+                       ovbcopy(lep, lep + 1,
+                               (INT_GET(leaf->hdr.count, ARCH_CONVERT) - index) * sizeof(*lep));
+               lfloglow = index;
+               lfloghigh = INT_GET(leaf->hdr.count, ARCH_CONVERT);
+               INT_MOD(leaf->hdr.count, ARCH_CONVERT, +1);
+       }
+       /*
+        * There are stale entries.  We'll use one for the new entry.
+        */
+       else {
+               /*
+                * If we didn't do a compact then we need to figure out
+                * which stale entry will be used.
+                */
+               if (compact == 0) {
+                       /*
+                        * Find first stale entry before our insertion point.
+                        */
+                       for (lowstale = index - 1;
+                            lowstale >= 0 &&
+                               INT_GET(leaf->ents[lowstale].address, ARCH_CONVERT) !=
+                               XFS_DIR2_NULL_DATAPTR;
+                            lowstale--)
+                               continue;
+                       /*
+                        * Find next stale entry after insertion point.
+                        * Stop looking if the answer would be worse than
+                        * lowstale already found.
+                        */
+                       for (highstale = index;
+                            highstale < INT_GET(leaf->hdr.count, ARCH_CONVERT) &&
+                               INT_GET(leaf->ents[highstale].address, ARCH_CONVERT) !=
+                               XFS_DIR2_NULL_DATAPTR &&
+                               (lowstale < 0 ||
+                                index - lowstale - 1 >= highstale - index);
+                            highstale++)
+                               continue;
+               }
+               /*
+                * Using the low stale entry.
+                * Shift entries up toward the stale slot.
+                */
+               if (lowstale >= 0 &&
+                   (highstale == INT_GET(leaf->hdr.count, ARCH_CONVERT) ||
+                    index - lowstale - 1 < highstale - index)) {
+                       ASSERT(INT_GET(leaf->ents[lowstale].address, ARCH_CONVERT) ==
+                              XFS_DIR2_NULL_DATAPTR);
+                       ASSERT(index - lowstale - 1 >= 0);
+                       if (index - lowstale - 1 > 0)
+                               ovbcopy(&leaf->ents[lowstale + 1],
+                                       &leaf->ents[lowstale],
+                                       (index - lowstale - 1) * sizeof(*lep));
+                       lep = &leaf->ents[index - 1];
+                       lfloglow = MIN(lowstale, lfloglow);
+                       lfloghigh = MAX(index - 1, lfloghigh);
+               }
+               /*
+                * Using the high stale entry.
+                * Shift entries down toward the stale slot.
+                */
+               else {
+                       ASSERT(INT_GET(leaf->ents[highstale].address, ARCH_CONVERT) ==
+                              XFS_DIR2_NULL_DATAPTR);
+                       ASSERT(highstale - index >= 0);
+                       if (highstale - index > 0)
+                               ovbcopy(&leaf->ents[index],
+                                       &leaf->ents[index + 1],
+                                       (highstale - index) * sizeof(*lep));
+                       lep = &leaf->ents[index];
+                       lfloglow = MIN(index, lfloglow);
+                       lfloghigh = MAX(highstale, lfloghigh);
+               }
+               INT_MOD(leaf->hdr.stale, ARCH_CONVERT, -1);
+       }
+       /*
+        * Insert the new entry, log everything.
+        */
+       INT_SET(lep->hashval, ARCH_CONVERT, args->hashval);
+       INT_SET(lep->address, ARCH_CONVERT, XFS_DIR2_DB_OFF_TO_DATAPTR(mp, args->blkno, args->index));
+       xfs_dir2_leaf_log_header(tp, bp);
+       xfs_dir2_leaf_log_ents(tp, bp, lfloglow, lfloghigh);
+       xfs_dir2_leafn_check(dp, bp);
+       return 0;
+}
+
+#ifdef DEBUG
+/*
+ * Check internal consistency of a leafn block.
+ */
+void
+xfs_dir2_leafn_check(
+       xfs_inode_t     *dp,                    /* incore directory inode */
+       xfs_dabuf_t     *bp)                    /* leaf buffer */
+{
+       int             i;                      /* leaf index */
+       xfs_dir2_leaf_t *leaf;                  /* leaf structure */
+       xfs_mount_t     *mp;                    /* filesystem mount point */
+       int             stale;                  /* count of stale leaves */
+
+       leaf = bp->data;
+       mp = dp->i_mount;
+       ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
+       ASSERT(INT_GET(leaf->hdr.count, ARCH_CONVERT) <= XFS_DIR2_MAX_LEAF_ENTS(mp));
+       for (i = stale = 0; i < INT_GET(leaf->hdr.count, ARCH_CONVERT); i++) {
+               if (i + 1 < INT_GET(leaf->hdr.count, ARCH_CONVERT)) {
+                       ASSERT(INT_GET(leaf->ents[i].hashval, ARCH_CONVERT) <=
+                              INT_GET(leaf->ents[i + 1].hashval, ARCH_CONVERT));
+                }
+               if (INT_GET(leaf->ents[i].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
+                       stale++;
+       }
+       ASSERT(INT_GET(leaf->hdr.stale, ARCH_CONVERT) == stale);
+}
+#endif /* DEBUG */
+
+/*
+ * Return the last hash value in the leaf.
+ * Stale entries are ok.
+ */
+xfs_dahash_t                                   /* hash value */
+xfs_dir2_leafn_lasthash(
+       xfs_dabuf_t     *bp,                    /* leaf buffer */
+       int             *count)                 /* count of entries in leaf */
+{
+       xfs_dir2_leaf_t *leaf;                  /* leaf structure */
+
+       leaf = bp->data;
+       ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
+       if (count)
+               *count = INT_GET(leaf->hdr.count, ARCH_CONVERT);
+       if (INT_ISZERO(leaf->hdr.count, ARCH_CONVERT))
+               return 0;
+       return INT_GET(leaf->ents[INT_GET(leaf->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT);
+}
+
+/*
+ * Look up a leaf entry in a node-format leaf block.
+ * If this is an addname then the extrablk in state is a freespace block,
+ * otherwise it's a data block.
+ */
+int
+xfs_dir2_leafn_lookup_int(
+       xfs_dabuf_t             *bp,            /* leaf buffer */
+       xfs_da_args_t           *args,          /* operation arguments */
+       int                     *indexp,        /* out: leaf entry index */
+       xfs_da_state_t          *state)         /* state to fill in */
+{
+       xfs_dabuf_t             *curbp;         /* current data/free buffer */
+       xfs_dir2_db_t           curdb;          /* current data block number */
+       xfs_dir2_db_t           curfdb;         /* current free block number */
+       xfs_dir2_data_entry_t   *dep;           /* data block entry */
+       xfs_inode_t             *dp;            /* incore directory inode */
+       int                     error;          /* error return value */
+       int                     fi;             /* free entry index */
+       xfs_dir2_free_t         *free;          /* free block structure */
+       int                     index;          /* leaf entry index */
+       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
+       int                     length;         /* length of new data entry */
+       xfs_dir2_leaf_entry_t   *lep;           /* leaf entry */
+       xfs_mount_t             *mp;            /* filesystem mount point */
+       xfs_dir2_db_t           newdb;          /* new data block number */
+       xfs_dir2_db_t           newfdb;         /* new free block number */
+       xfs_trans_t             *tp;            /* transaction pointer */
+
+       dp = args->dp;
+       tp = args->trans;
+       mp = dp->i_mount;
+       leaf = bp->data;
+       ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
+#ifdef __KERNEL__
+       ASSERT(INT_GET(leaf->hdr.count, ARCH_CONVERT) > 0);
+#endif
+       xfs_dir2_leafn_check(dp, bp);
+       /*
+        * Look up the hash value in the leaf entries.
+        */
+       index = xfs_dir2_leaf_search_hash(args, bp);
+       /*
+        * Do we have a buffer coming in?
+        */
+       if (state->extravalid)
+               curbp = state->extrablk.bp;
+       else
+               curbp = NULL;
+       /*
+        * For addname, it's a free block buffer, get the block number.
+        */
+       if (args->addname) {
+               curfdb = curbp ? state->extrablk.blkno : -1;
+               curdb = -1;
+               length = XFS_DIR2_DATA_ENTSIZE(args->namelen);
+               if (free = (curbp ? curbp->data : NULL))
+                       ASSERT(INT_GET(free->hdr.magic, ARCH_CONVERT) == XFS_DIR2_FREE_MAGIC);
+       }
+       /*
+        * For others, it's a data block buffer, get the block number.
+        */
+       else {
+               curfdb = -1;
+               curdb = curbp ? state->extrablk.blkno : -1;
+       }
+       /*
+        * Loop over leaf entries with the right hash value.
+        */
+       for (lep = &leaf->ents[index];
+            index < INT_GET(leaf->hdr.count, ARCH_CONVERT) && INT_GET(lep->hashval, ARCH_CONVERT) == args->hashval;
+            lep++, index++) {
+               /*
+                * Skip stale leaf entries.
+                */
+               if (INT_GET(lep->address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
+                       continue;
+               /*
+                * Pull the data block number from the entry.
+                */
+               newdb = XFS_DIR2_DATAPTR_TO_DB(mp, INT_GET(lep->address, ARCH_CONVERT));
+               /*
+                * For addname, we're looking for a place to put the new entry.
+                * We want to use a data block with an entry of equal
+                * hash value to ours if there is one with room.
+                */
+               if (args->addname) {
+                       /*
+                        * If this block isn't the data block we already have
+                        * in hand, take a look at it.
+                        */
+                       if (newdb != curdb) {
+                               curdb = newdb;
+                               /*
+                                * Convert the data block to the free block
+                                * holding its freespace information.
+                                */
+                               newfdb = XFS_DIR2_DB_TO_FDB(mp, newdb);
+                               /*
+                                * If it's not the one we have in hand,
+                                * read it in.
+                                */
+                               if (newfdb != curfdb) {
+                                       /*
+                                        * If we had one before, drop it.
+                                        */
+                                       if (curbp)
+                                               xfs_da_brelse(tp, curbp);
+                                       /*
+                                        * Read the free block.
+                                        */
+                                       if (error = xfs_da_read_buf(tp, dp,
+                                                       XFS_DIR2_DB_TO_DA(mp,
+                                                               newfdb),
+                                                       -1, &curbp,
+                                                       XFS_DATA_FORK)) {
+#pragma mips_frequency_hint NEVER
+                                               return error;
+                                       }
+                                       curfdb = newfdb;
+                                       free = curbp->data;
+                                       ASSERT(INT_GET(free->hdr.magic, ARCH_CONVERT) ==
+                                              XFS_DIR2_FREE_MAGIC);
+                                       ASSERT((INT_GET(free->hdr.firstdb, ARCH_CONVERT) %
+                                               XFS_DIR2_MAX_FREE_BESTS(mp)) ==
+                                              0);
+                                       ASSERT(INT_GET(free->hdr.firstdb, ARCH_CONVERT) <= curdb);
+                                       ASSERT(curdb <
+                                              INT_GET(free->hdr.firstdb, ARCH_CONVERT) +
+                                              INT_GET(free->hdr.nvalid, ARCH_CONVERT));
+                               }
+                               /*
+                                * Get the index for our entry.
+                                */
+                               fi = XFS_DIR2_DB_TO_FDINDEX(mp, curdb);
+                               /*
+                                * If it has room, return it.
+                                */
+                               if (INT_GET(free->bests[fi], ARCH_CONVERT) == NULLDATAOFF) {
+#pragma mips_frequency_hint NEVER
+                                       return XFS_ERROR(EFSCORRUPTED);
+                               }
+                               if (INT_GET(free->bests[fi], ARCH_CONVERT) >= length) {
+                                       *indexp = index;
+                                       state->extravalid = 1;
+                                       state->extrablk.bp = curbp;
+                                       state->extrablk.blkno = curfdb;
+                                       state->extrablk.index = fi;
+                                       state->extrablk.magic =
+                                               XFS_DIR2_FREE_MAGIC;
+                                       ASSERT(args->oknoent);
+                                       return XFS_ERROR(ENOENT);
+                               }
+                       }
+               }
+               /*
+                * Not adding a new entry, so we really want to find
+                * the name given to us.
+                */
+               else {
+                       /*
+                        * If it's a different data block, go get it.
+                        */
+                       if (newdb != curdb) {
+                               /*
+                                * If we had a block before, drop it.
+                                */
+                               if (curbp)
+                                       xfs_da_brelse(tp, curbp);
+                               /*
+                                * Read the data block.
+                                */
+                               if (error =
+                                   xfs_da_read_buf(tp, dp,
+                                           XFS_DIR2_DB_TO_DA(mp, newdb), -1,
+                                           &curbp, XFS_DATA_FORK)) {
+#pragma mips_frequency_hint NEVER
+                                       return error;
+                               }
+                               xfs_dir2_data_check(dp, curbp);
+                               curdb = newdb;
+                       }
+                       /*
+                        * Point to the data entry.
+                        */
+                       dep = (xfs_dir2_data_entry_t *)
+                             ((char *)curbp->data +
+                              XFS_DIR2_DATAPTR_TO_OFF(mp, INT_GET(lep->address, ARCH_CONVERT)));
+                       /*
+                        * Compare the entry, return it if it matches.
+                        */
+                       if (dep->namelen == args->namelen &&
+                           dep->name[0] == args->name[0] &&
+                           bcmp(dep->name, args->name, args->namelen) == 0) {
+                               args->inumber = INT_GET(dep->inumber, ARCH_CONVERT);
+                               *indexp = index;
+                               state->extravalid = 1;
+                               state->extrablk.bp = curbp;
+                               state->extrablk.blkno = curdb;
+                               state->extrablk.index =
+                                       (int)((char *)dep -
+                                             (char *)curbp->data);
+                               state->extrablk.magic = XFS_DIR2_DATA_MAGIC;
+                               return XFS_ERROR(EEXIST);
+                       }
+               }
+       }
+       /*
+        * Didn't find a match.
+        * If we are holding a buffer, give it back in case our caller
+        * finds it useful.
+        */
+       if (state->extravalid = (curbp != NULL)) {
+               state->extrablk.bp = curbp;
+               state->extrablk.index = -1;
+               /*
+                * For addname, giving back a free block.
+                */
+               if (args->addname) {
+                       state->extrablk.blkno = curfdb;
+                       state->extrablk.magic = XFS_DIR2_FREE_MAGIC;
+               }
+               /*
+                * For other callers, giving back a data block.
+                */
+               else {
+                       state->extrablk.blkno = curdb;
+                       state->extrablk.magic = XFS_DIR2_DATA_MAGIC;
+               }
+       }
+       /*
+        * Return the final index, that will be the insertion point.
+        */
+       *indexp = index;
+       ASSERT(index == INT_GET(leaf->hdr.count, ARCH_CONVERT) || args->oknoent);
+       return XFS_ERROR(ENOENT);
+}
+
+/*
+ * Move count leaf entries from source to destination leaf.
+ * Log entries and headers.  Stale entries are preserved.
+ */
+static void
+xfs_dir2_leafn_moveents(
+       xfs_da_args_t   *args,                  /* operation arguments */
+       xfs_dabuf_t     *bp_s,                  /* source leaf buffer */
+       int             start_s,                /* source leaf index */
+       xfs_dabuf_t     *bp_d,                  /* destination leaf buffer */
+       int             start_d,                /* destination leaf index */
+       int             count)                  /* count of leaves to copy */
+{
+       xfs_dir2_leaf_t *leaf_d;                /* destination leaf structure */
+       xfs_dir2_leaf_t *leaf_s;                /* source leaf structure */
+       int             stale;                  /* count stale leaves copied */
+       xfs_trans_t     *tp;                    /* transaction pointer */
+
+       xfs_dir2_trace_args_bibii("leafn_moveents", args, bp_s, start_s, bp_d,
+               start_d, count);
+       /*
+        * Silently return if nothing to do.
+        */
+       if (count == 0) {
+#pragma mips_frequency_hint NEVER
+               return;
+       }
+       tp = args->trans;
+       leaf_s = bp_s->data;
+       leaf_d = bp_d->data;
+       /*
+        * If the destination index is not the end of the current
+        * destination leaf entries, open up a hole in the destination
+        * to hold the new entries.
+        */
+       if (start_d < INT_GET(leaf_d->hdr.count, ARCH_CONVERT)) {
+               ovbcopy(&leaf_d->ents[start_d], &leaf_d->ents[start_d + count],
+                       (INT_GET(leaf_d->hdr.count, ARCH_CONVERT) - start_d) *
+                       sizeof(xfs_dir2_leaf_entry_t));
+               xfs_dir2_leaf_log_ents(tp, bp_d, start_d + count,
+                       count + INT_GET(leaf_d->hdr.count, ARCH_CONVERT) - 1);
+       }
+       /*
+        * If the source has stale leaves, count the ones in the copy range
+        * so we can update the header correctly.
+        */
+       if (!INT_ISZERO(leaf_s->hdr.stale, ARCH_CONVERT)) {
+               int     i;                      /* temp leaf index */
+
+               for (i = start_s, stale = 0; i < start_s + count; i++) {
+                       if (INT_GET(leaf_s->ents[i].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
+                               stale++;
+               }
+       } else
+               stale = 0;
+       /*
+        * Copy the leaf entries from source to destination.
+        */
+       bcopy(&leaf_s->ents[start_s], &leaf_d->ents[start_d],
+               count * sizeof(xfs_dir2_leaf_entry_t));
+       xfs_dir2_leaf_log_ents(tp, bp_d, start_d, start_d + count - 1);
+       /*
+        * If there are source entries after the ones we copied,
+        * delete the ones we copied by sliding the next ones down.
+        */
+       if (start_s + count < INT_GET(leaf_s->hdr.count, ARCH_CONVERT)) {
+               ovbcopy(&leaf_s->ents[start_s + count], &leaf_s->ents[start_s],
+                       count * sizeof(xfs_dir2_leaf_entry_t));
+               xfs_dir2_leaf_log_ents(tp, bp_s, start_s, start_s + count - 1);
+       }
+       /*
+        * Update the headers and log them.
+        */
+       INT_MOD(leaf_s->hdr.count, ARCH_CONVERT, -(count));
+       INT_MOD(leaf_s->hdr.stale, ARCH_CONVERT, -(stale));
+       INT_MOD(leaf_d->hdr.count, ARCH_CONVERT, count);
+       INT_MOD(leaf_d->hdr.stale, ARCH_CONVERT, stale);
+       xfs_dir2_leaf_log_header(tp, bp_s);
+       xfs_dir2_leaf_log_header(tp, bp_d);
+       xfs_dir2_leafn_check(args->dp, bp_s);
+       xfs_dir2_leafn_check(args->dp, bp_d);
+}
+
+/*
+ * Determine the sort order of two leaf blocks.
+ * Returns 1 if both are valid and leaf2 should be before leaf1, else 0.
+ */
+int                                            /* sort order */
+xfs_dir2_leafn_order(
+       xfs_dabuf_t     *leaf1_bp,              /* leaf1 buffer */
+       xfs_dabuf_t     *leaf2_bp)              /* leaf2 buffer */
+{
+       xfs_dir2_leaf_t *leaf1;                 /* leaf1 structure */
+       xfs_dir2_leaf_t *leaf2;                 /* leaf2 structure */
+
+       leaf1 = leaf1_bp->data;
+       leaf2 = leaf2_bp->data;
+       ASSERT(INT_GET(leaf1->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
+       ASSERT(INT_GET(leaf2->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
+       if (INT_GET(leaf1->hdr.count, ARCH_CONVERT) > 0 &&
+           INT_GET(leaf2->hdr.count, ARCH_CONVERT) > 0 &&
+           (INT_GET(leaf2->ents[0].hashval, ARCH_CONVERT) < INT_GET(leaf1->ents[0].hashval, ARCH_CONVERT) ||
+            INT_GET(leaf2->ents[INT_GET(leaf2->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT) <
+            INT_GET(leaf1->ents[INT_GET(leaf1->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT)))
+               return 1;
+       return 0;
+}
+
+/*
+ * Rebalance leaf entries between two leaf blocks.
+ * This is actually only called when the second block is new,
+ * though the code deals with the general case.
+ * A new entry will be inserted in one of the blocks, and that
+ * entry is taken into account when balancing.
+ */
+static void
+xfs_dir2_leafn_rebalance(
+       xfs_da_state_t          *state,         /* btree cursor */
+       xfs_da_state_blk_t      *blk1,          /* first btree block */
+       xfs_da_state_blk_t      *blk2)          /* second btree block */
+{
+       xfs_da_args_t           *args;          /* operation arguments */
+       int                     count;          /* count (& direction) leaves */
+       int                     isleft;         /* new goes in left leaf */
+       xfs_dir2_leaf_t         *leaf1;         /* first leaf structure */
+       xfs_dir2_leaf_t         *leaf2;         /* second leaf structure */
+       int                     mid;            /* midpoint leaf index */
+#ifdef DEBUG
+       int                     oldstale;       /* old count of stale leaves */
+#endif
+       int                     oldsum;         /* old total leaf count */
+       int                     swap;           /* swapped leaf blocks */
+
+       args = state->args;
+       /*
+        * If the block order is wrong, swap the arguments.
+        */
+       if (swap = xfs_dir2_leafn_order(blk1->bp, blk2->bp)) {
+#pragma mips_frequency_hint NEVER
+               xfs_da_state_blk_t      *tmp;   /* temp for block swap */
+
+               tmp = blk1;
+               blk1 = blk2;
+               blk2 = tmp;
+       }
+       leaf1 = blk1->bp->data;
+       leaf2 = blk2->bp->data;
+       oldsum = INT_GET(leaf1->hdr.count, ARCH_CONVERT) + INT_GET(leaf2->hdr.count, ARCH_CONVERT);
+#ifdef DEBUG
+       oldstale = INT_GET(leaf1->hdr.stale, ARCH_CONVERT) + INT_GET(leaf2->hdr.stale, ARCH_CONVERT);
+#endif
+       mid = oldsum >> 1;
+       /*
+        * If the old leaf count was odd then the new one will be even,
+        * so we need to divide the new count evenly.
+        */
+       if (oldsum & 1) {
+               xfs_dahash_t    midhash;        /* middle entry hash value */
+
+               if (mid >= INT_GET(leaf1->hdr.count, ARCH_CONVERT))
+                       midhash = INT_GET(leaf2->ents[mid - INT_GET(leaf1->hdr.count, ARCH_CONVERT)].hashval, ARCH_CONVERT);
+               else
+                       midhash = INT_GET(leaf1->ents[mid].hashval, ARCH_CONVERT);
+               isleft = args->hashval <= midhash;
+       }
+       /*
+        * If the old count is even then the new count is odd, so there's
+        * no preferred side for the new entry.
+        * Pick the left one.
+        */
+       else
+               isleft = 1;
+       /*
+        * Calculate moved entry count.  Positive means left-to-right,
+        * negative means right-to-left.  Then move the entries.
+        */
+       count = INT_GET(leaf1->hdr.count, ARCH_CONVERT) - mid + (isleft == 0);
+       if (count > 0)
+               xfs_dir2_leafn_moveents(args, blk1->bp,
+                       INT_GET(leaf1->hdr.count, ARCH_CONVERT) - count, blk2->bp, 0, count);
+       else if (count < 0)
+               xfs_dir2_leafn_moveents(args, blk2->bp, 0, blk1->bp,
+                       INT_GET(leaf1->hdr.count, ARCH_CONVERT), count);
+       ASSERT(INT_GET(leaf1->hdr.count, ARCH_CONVERT) + INT_GET(leaf2->hdr.count, ARCH_CONVERT) == oldsum);
+       ASSERT(INT_GET(leaf1->hdr.stale, ARCH_CONVERT) + INT_GET(leaf2->hdr.stale, ARCH_CONVERT) == oldstale);
+       /*
+        * Mark whether we're inserting into the old or new leaf.
+        */
+       if (INT_GET(leaf1->hdr.count, ARCH_CONVERT) < INT_GET(leaf2->hdr.count, ARCH_CONVERT))
+               state->inleaf = swap;
+       else if (INT_GET(leaf1->hdr.count, ARCH_CONVERT) > INT_GET(leaf2->hdr.count, ARCH_CONVERT))
+               state->inleaf = !swap;
+       else
+               state->inleaf =
+                       swap ^ (args->hashval < INT_GET(leaf2->ents[0].hashval, ARCH_CONVERT));
+       /*
+        * Adjust the expected index for insertion.
+        */
+       if (!state->inleaf)
+               blk2->index = blk1->index - INT_GET(leaf1->hdr.count, ARCH_CONVERT);
+}
+
+/*
+ * Remove an entry from a node directory.
+ * This removes the leaf entry and the data entry,
+ * and updates the free block if necessary.
+ */
+STATIC int                                     /* error */
+xfs_dir2_leafn_remove(
+       xfs_da_args_t           *args,          /* operation arguments */
+       xfs_dabuf_t             *bp,            /* leaf buffer */
+       int                     index,          /* leaf entry index */
+       xfs_da_state_blk_t      *dblk,          /* data block */
+       int                     *rval)          /* resulting block needs join */
+{
+       xfs_dir2_data_t         *data;          /* data block structure */
+       xfs_dir2_db_t           db;             /* data block number */
+       xfs_dabuf_t             *dbp;           /* data block buffer */
+       xfs_dir2_data_entry_t   *dep;           /* data block entry */
+       xfs_inode_t             *dp;            /* incore directory inode */
+       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
+       xfs_dir2_leaf_entry_t   *lep;           /* leaf entry */
+       int                     longest;        /* longest data free entry */
+       int                     off;            /* data block entry offset */
+       xfs_mount_t             *mp;            /* filesystem mount point */
+       int                     needlog;        /* need to log data header */
+       int                     needscan;       /* need to rescan data frees */
+       xfs_trans_t             *tp;            /* transaction pointer */
+
+       xfs_dir2_trace_args_sb("leafn_remove", args, index, bp);
+       dp = args->dp;
+       tp = args->trans;
+       mp = dp->i_mount;
+       leaf = bp->data;
+       ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
+       /*
+        * Point to the entry we're removing.
+        */
+       lep = &leaf->ents[index];
+       /*
+        * Extract the data block and offset from the entry.
+        */
+       db = XFS_DIR2_DATAPTR_TO_DB(mp, INT_GET(lep->address, ARCH_CONVERT));
+       ASSERT(dblk->blkno == db);
+       off = XFS_DIR2_DATAPTR_TO_OFF(mp, INT_GET(lep->address, ARCH_CONVERT));
+       ASSERT(dblk->index == off);
+       /*
+        * Kill the leaf entry by marking it stale.
+        * Log the leaf block changes.
+        */
+       INT_MOD(leaf->hdr.stale, ARCH_CONVERT, +1);
+       xfs_dir2_leaf_log_header(tp, bp);
+       INT_SET(lep->address, ARCH_CONVERT, XFS_DIR2_NULL_DATAPTR);
+       xfs_dir2_leaf_log_ents(tp, bp, index, index);
+       /*
+        * Make the data entry free.  Keep track of the longest freespace
+        * in the data block in case it changes.
+        */
+       dbp = dblk->bp;
+       data = dbp->data;
+       dep = (xfs_dir2_data_entry_t *)((char *)data + off);
+       longest = INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT);
+       needlog = needscan = 0;
+       xfs_dir2_data_make_free(tp, dbp, off,
+               XFS_DIR2_DATA_ENTSIZE(dep->namelen), &needlog, &needscan);
+       /*
+        * Rescan the data block freespaces for bestfree.
+        * Log the data block header if needed.
+        */
+       if (needscan)
+               xfs_dir2_data_freescan(mp, data, &needlog, NULL);
+       if (needlog)
+               xfs_dir2_data_log_header(tp, dbp);
+       xfs_dir2_data_check(dp, dbp);
+       /*
+        * If the longest data block freespace changes, need to update
+        * the corresponding freeblock entry.
+        */
+       if (longest < INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT)) {
+               int             error;          /* error return value */
+               xfs_dabuf_t     *fbp;           /* freeblock buffer */
+               xfs_dir2_db_t   fdb;            /* freeblock block number */
+               int             findex;         /* index in freeblock entries */
+               xfs_dir2_free_t *free;          /* freeblock structure */
+               int             logfree;        /* need to log free entry */
+
+               /*
+                * Convert the data block number to a free block,
+                * read in the free block.
+                */
+               fdb = XFS_DIR2_DB_TO_FDB(mp, db);
+               if (error = xfs_da_read_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, fdb),
+                               -1, &fbp, XFS_DATA_FORK)) {
+#pragma mips_frequency_hint NEVER
+                       return error;
+               }
+               free = fbp->data;
+               ASSERT(INT_GET(free->hdr.magic, ARCH_CONVERT) == XFS_DIR2_FREE_MAGIC);
+               ASSERT(INT_GET(free->hdr.firstdb, ARCH_CONVERT) ==
+                      XFS_DIR2_MAX_FREE_BESTS(mp) *
+                      (fdb - XFS_DIR2_FREE_FIRSTDB(mp)));
+               /*
+                * Calculate which entry we need to fix.
+                */
+               findex = XFS_DIR2_DB_TO_FDINDEX(mp, db);
+               longest = INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT);
+               /*
+                * If the data block is now empty we can get rid of it
+                * (usually).
+                */
+               if (longest == mp->m_dirblksize - (uint)sizeof(data->hdr)) {
+#pragma mips_frequency_hint NEVER
+                       /*
+                        * Try to punch out the data block.
+                        */
+                       error = xfs_dir2_shrink_inode(args, db, dbp);
+                       if (error == 0) {
+                               dblk->bp = NULL;
+                               data = NULL;
+                       }
+                       /*
+                        * We can get ENOSPC if there's no space reservation.
+                        * In this case just drop the buffer and some one else
+                        * will eventually get rid of the empty block.
+                        */
+                       else if (error == ENOSPC && args->total == 0)
+                               xfs_da_buf_done(dbp);
+                       else
+                               return error;
+               }
+               /*
+                * If we got rid of the data block, we can eliminate that entry
+                * in the free block.
+                */
+               if (data == NULL) {
+#pragma mips_frequency_hint NEVER
+                       /*
+                        * One less used entry in the free table.
+                        */
+                       INT_MOD(free->hdr.nused, ARCH_CONVERT, -1);
+                       xfs_dir2_free_log_header(tp, fbp);
+                       /*
+                        * If this was the last entry in the table, we can
+                        * trim the table size back.  There might be other
+                        * entries at the end referring to non-existent
+                        * data blocks, get those too.
+                        */
+                       if (findex == INT_GET(free->hdr.nvalid, ARCH_CONVERT) - 1) {
+                               int     i;              /* free entry index */
+
+                               for (i = findex - 1;
+                                    i >= 0 && INT_GET(free->bests[i], ARCH_CONVERT) == NULLDATAOFF;
+                                    i--)
+                                       continue;
+                               INT_SET(free->hdr.nvalid, ARCH_CONVERT, i + 1);
+                               logfree = 0;
+                       }
+                       /*
+                        * Not the last entry, just punch it out.
+                        */
+                       else {
+                               INT_SET(free->bests[findex], ARCH_CONVERT, NULLDATAOFF);
+                               logfree = 1;
+                       }
+                       /*
+                        * If there are no useful entries left in the block,
+                        * get rid of the block if we can.
+                        */
+                       if (INT_GET(free->hdr.nused, ARCH_CONVERT) == 0) {
+                               error = xfs_dir2_shrink_inode(args, fdb, fbp);
+                               if (error == 0) {
+                                       fbp = NULL;
+                                       logfree = 0;
+                               } else if (error != ENOSPC || args->total != 0)
+                                       return error;
+                               /*
+                                * It's possible to get ENOSPC if there is no
+                                * space reservation.  In this case some one
+                                * else will eventually get rid of this block.
+                                */
+                       }
+               }
+               /*
+                * Data block is not empty, just set the free entry to
+                * the new value.
+                */
+               else {
+                       INT_SET(free->bests[findex], ARCH_CONVERT, longest);
+                       logfree = 1;
+               }
+               /*
+                * Log the free entry that changed, unless we got rid of it.
+                */
+               if (logfree)
+                       xfs_dir2_free_log_bests(tp, fbp, findex, findex);
+               /*
+                * Drop the buffer if we still have it.
+                */
+               if (fbp)
+                       xfs_da_buf_done(fbp);
+       }
+       xfs_dir2_leafn_check(dp, bp);
+       /*
+        * Return indication of whether this leaf block is emtpy enough
+        * to justify trying to join it with a neighbor.
+        */
+       *rval =
+               ((uint)sizeof(leaf->hdr) +
+                (uint)sizeof(leaf->ents[0]) *
+                (INT_GET(leaf->hdr.count, ARCH_CONVERT) - INT_GET(leaf->hdr.stale, ARCH_CONVERT))) <
+               mp->m_dir_magicpct;
+       return 0;
+}
+
+/*
+ * Split the leaf entries in the old block into old and new blocks.
+ */
+int                                            /* error */
+xfs_dir2_leafn_split(
+       xfs_da_state_t          *state,         /* btree cursor */
+       xfs_da_state_blk_t      *oldblk,        /* original block */
+       xfs_da_state_blk_t      *newblk)        /* newly created block */
+{
+       xfs_da_args_t           *args;          /* operation arguments */
+       xfs_dablk_t             blkno;          /* new leaf block number */
+       int                     error;          /* error return value */
+       xfs_mount_t             *mp;            /* filesystem mount point */
+
+       /*
+        * Allocate space for a new leaf node.
+        */
+       args = state->args;
+       mp = args->dp->i_mount;
+       ASSERT(args != NULL);
+       ASSERT(oldblk->magic == XFS_DIR2_LEAFN_MAGIC);
+       error = xfs_da_grow_inode(args, &blkno);
+       if (error) {
+#pragma mips_frequency_hint NEVER
+               return error;
+       }
+       /*
+        * Initialize the new leaf block.
+        */
+       error = xfs_dir2_leaf_init(args, XFS_DIR2_DA_TO_DB(mp, blkno),
+               &newblk->bp, XFS_DIR2_LEAFN_MAGIC);
+       if (error) {
+#pragma mips_frequency_hint NEVER
+               return error;
+       }
+       newblk->blkno = blkno;
+       newblk->magic = XFS_DIR2_LEAFN_MAGIC;
+       /*
+        * Rebalance the entries across the two leaves, link the new
+        * block into the leaves.
+        */
+       xfs_dir2_leafn_rebalance(state, oldblk, newblk);
+       error = xfs_da_blk_link(state, oldblk, newblk);
+       if (error) {
+#pragma mips_frequency_hint NEVER
+               return error;
+       }
+       /*
+        * Insert the new entry in the correct block.
+        */
+       if (state->inleaf)
+               error = xfs_dir2_leafn_add(oldblk->bp, args, oldblk->index);
+       else
+               error = xfs_dir2_leafn_add(newblk->bp, args, newblk->index);
+       /*
+        * Update last hashval in each block since we added the name.
+        */
+       oldblk->hashval = xfs_dir2_leafn_lasthash(oldblk->bp, NULL);
+       newblk->hashval = xfs_dir2_leafn_lasthash(newblk->bp, NULL);
+       xfs_dir2_leafn_check(args->dp, oldblk->bp);
+       xfs_dir2_leafn_check(args->dp, newblk->bp);
+       return error;
+}
+
+/*
+ * Check a leaf block and its neighbors to see if the block should be
+ * collapsed into one or the other neighbor.  Always keep the block
+ * with the smaller block number.
+ * If the current block is over 50% full, don't try to join it, return 0.
+ * If the block is empty, fill in the state structure and return 2.
+ * If it can be collapsed, fill in the state structure and return 1.
+ * If nothing can be done, return 0.
+ */
+int                                            /* error */
+xfs_dir2_leafn_toosmall(
+       xfs_da_state_t          *state,         /* btree cursor */
+       int                     *action)        /* resulting action to take */
+{
+       xfs_da_state_blk_t      *blk;           /* leaf block */
+       xfs_dablk_t             blkno;          /* leaf block number */
+       xfs_dabuf_t             *bp;            /* leaf buffer */
+       int                     bytes;          /* bytes in use */
+       int                     count;          /* leaf live entry count */
+       int                     error;          /* error return value */
+       int                     forward;        /* sibling block direction */
+       int                     i;              /* sibling counter */
+       xfs_da_blkinfo_t        *info;          /* leaf block header */
+       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
+       int                     rval;           /* result from path_shift */
+
+       /*
+        * Check for the degenerate case of the block being over 50% full.
+        * If so, it's not worth even looking to see if we might be able
+        * to coalesce with a sibling.
+        */
+       blk = &state->path.blk[state->path.active - 1];
+       info = blk->bp->data;
+       ASSERT(INT_GET(info->magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
+       leaf = (xfs_dir2_leaf_t *)info;
+       count = INT_GET(leaf->hdr.count, ARCH_CONVERT) - INT_GET(leaf->hdr.stale, ARCH_CONVERT);
+       bytes = (uint)sizeof(leaf->hdr) + count * (uint)sizeof(leaf->ents[0]);
+       if (bytes > (state->blocksize >> 1)) {
+               /*
+                * Blk over 50%, don't try to join.
+                */
+               *action = 0;
+               return 0;
+       }
+       /*
+        * Check for the degenerate case of the block being empty.
+        * If the block is empty, we'll simply delete it, no need to
+        * coalesce it with a sibling block.  We choose (arbitrarily)
+        * to merge with the forward block unless it is NULL.
+        */
+       if (count == 0) {
+#pragma mips_frequency_hint NEVER
+               /*
+                * Make altpath point to the block we want to keep and
+                * path point to the block we want to drop (this one).
+                */
+               forward = !INT_ISZERO(info->forw, ARCH_CONVERT);
+               bcopy(&state->path, &state->altpath, sizeof(state->path));
+               error = xfs_da_path_shift(state, &state->altpath, forward, 0,
+                       &rval);
+               if (error)
+                       return error;
+               *action = rval ? 2 : 0;
+               return 0;
+       }
+       /*
+        * Examine each sibling block to see if we can coalesce with
+        * at least 25% free space to spare.  We need to figure out
+        * whether to merge with the forward or the backward block.
+        * We prefer coalescing with the lower numbered sibling so as
+        * to shrink a directory over time.
+        */
+       forward = INT_GET(info->forw, ARCH_CONVERT) < INT_GET(info->back, ARCH_CONVERT);
+       for (i = 0, bp = NULL; i < 2; forward = !forward, i++) {
+               blkno = forward ?INT_GET( info->forw, ARCH_CONVERT) : INT_GET(info->back, ARCH_CONVERT);
+               if (blkno == 0)
+                       continue;
+               /*
+                * Read the sibling leaf block.
+                */
+               if (error =
+                   xfs_da_read_buf(state->args->trans, state->args->dp, blkno,
+                           -1, &bp, XFS_DATA_FORK)) {
+#pragma mips_frequency_hint NEVER
+                       return error;
+               }
+               ASSERT(bp != NULL);
+               /*
+                * Count bytes in the two blocks combined.
+                */
+               leaf = (xfs_dir2_leaf_t *)info;
+               count = INT_GET(leaf->hdr.count, ARCH_CONVERT) - INT_GET(leaf->hdr.stale, ARCH_CONVERT);
+               bytes = state->blocksize - (state->blocksize >> 2);
+               leaf = bp->data;
+               ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
+               count += INT_GET(leaf->hdr.count, ARCH_CONVERT) - INT_GET(leaf->hdr.stale, ARCH_CONVERT);
+               bytes -= count * (uint)sizeof(leaf->ents[0]);
+               /*
+                * Fits with at least 25% to spare.
+                */
+               if (bytes >= 0)
+                       break;
+               xfs_da_brelse(state->args->trans, bp);
+       }
+       /*
+        * Didn't like either block, give up.
+        */
+       if (i >= 2) {
+               *action = 0;
+               return 0;
+       }
+       /*
+        * Done with the sibling leaf block here, drop the dabuf
+        * so path_shift can get it.
+        */
+       xfs_da_buf_done(bp);
+       /*
+        * Make altpath point to the block we want to keep (the lower
+        * numbered block) and path point to the block we want to drop.
+        */
+       bcopy(&state->path, &state->altpath, sizeof(state->path));
+       if (blkno < blk->blkno)
+               error = xfs_da_path_shift(state, &state->altpath, forward, 0,
+                       &rval);
+       else
+               error = xfs_da_path_shift(state, &state->path, forward, 0,
+                       &rval);
+       if (error) {
+#pragma mips_frequency_hint NEVER
+               return error;
+       }
+       *action = rval ? 0 : 1;
+       return 0;
+}
+
+/*
+ * Move all the leaf entries from drop_blk to save_blk.
+ * This is done as part of a join operation.
+ */
+void
+xfs_dir2_leafn_unbalance(
+       xfs_da_state_t          *state,         /* cursor */
+       xfs_da_state_blk_t      *drop_blk,      /* dead block */
+       xfs_da_state_blk_t      *save_blk)      /* surviving block */
+{
+       xfs_da_args_t           *args;          /* operation arguments */
+       xfs_dir2_leaf_t         *drop_leaf;     /* dead leaf structure */
+       xfs_dir2_leaf_t         *save_leaf;     /* surviving leaf structure */
+
+       args = state->args;
+       ASSERT(drop_blk->magic == XFS_DIR2_LEAFN_MAGIC);
+       ASSERT(save_blk->magic == XFS_DIR2_LEAFN_MAGIC);
+       drop_leaf = drop_blk->bp->data;
+       save_leaf = save_blk->bp->data;
+       ASSERT(INT_GET(drop_leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
+       ASSERT(INT_GET(save_leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
+       /*
+        * If there are any stale leaf entries, take this opportunity
+        * to purge them.
+        */
+       if (INT_GET(drop_leaf->hdr.stale, ARCH_CONVERT))
+               xfs_dir2_leaf_compact(args, drop_blk->bp);
+       if (INT_GET(save_leaf->hdr.stale, ARCH_CONVERT))
+               xfs_dir2_leaf_compact(args, save_blk->bp);
+       /*
+        * Move the entries from drop to the appropriate end of save.
+        */
+       drop_blk->hashval = INT_GET(drop_leaf->ents[INT_GET(drop_leaf->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT);
+       if (xfs_dir2_leafn_order(save_blk->bp, drop_blk->bp))
+               xfs_dir2_leafn_moveents(args, drop_blk->bp, 0, save_blk->bp, 0,
+                       INT_GET(drop_leaf->hdr.count, ARCH_CONVERT));
+       else
+               xfs_dir2_leafn_moveents(args, drop_blk->bp, 0, save_blk->bp,
+                       INT_GET(save_leaf->hdr.count, ARCH_CONVERT), INT_GET(drop_leaf->hdr.count, ARCH_CONVERT));
+       save_blk->hashval = INT_GET(save_leaf->ents[INT_GET(save_leaf->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT);
+       xfs_dir2_leafn_check(args->dp, save_blk->bp);
+}
+
+/*
+ * Top-level node form directory addname routine.
+ */
+int                                            /* error */
+xfs_dir2_node_addname(
+       xfs_da_args_t           *args)          /* operation arguments */
+{
+       xfs_da_state_blk_t      *blk;           /* leaf block for insert */
+       int                     error;          /* error return value */
+       int                     rval;           /* sub-return value */
+       xfs_da_state_t          *state;         /* btree cursor */
+
+       xfs_dir2_trace_args("node_addname", args);
+       /*
+        * Allocate and initialize the state (btree cursor).
+        */
+       state = xfs_da_state_alloc();
+       state->args = args;
+       state->mp = args->dp->i_mount;
+       state->blocksize = state->mp->m_dirblksize;
+       /*
+        * Look up the name.  We're not supposed to find it, but
+        * this gives us the insertion point.
+        */
+       error = xfs_da_node_lookup_int(state, &rval);
+       if (error)
+               rval = error;
+       if (rval != ENOENT) {
+#pragma mips_frequency_hint NEVER
+               goto done;
+       }
+       /*
+        * Add the data entry to a data block.
+        * Extravalid is set to a freeblock found by lookup.
+        */
+       rval = xfs_dir2_node_addname_int(args,
+               state->extravalid ? &state->extrablk : NULL);
+       if (rval) {
+#pragma mips_frequency_hint NEVER
+               goto done;
+       }
+       blk = &state->path.blk[state->path.active - 1];
+       ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC);
+       /*
+        * Add the new leaf entry.
+        */
+       rval = xfs_dir2_leafn_add(blk->bp, args, blk->index);
+       if (rval == 0) {
+               /*
+                * It worked, fix the hash values up the btree.
+                */
+               if (!args->justcheck)
+                       xfs_da_fixhashpath(state, &state->path);
+       } else {
+#pragma mips_frequency_hint NEVER
+               /*
+                * It didn't work, we need to split the leaf block.
+                */
+               if (args->total == 0) {
+                       ASSERT(rval == ENOSPC);
+                       goto done;
+               }
+               /*
+                * Split the leaf block and insert the new entry.
+                */
+               rval = xfs_da_split(state);
+       }
+done:
+       xfs_da_state_free(state);
+       return rval;
+}
+
+
+/*
+ * Add the data entry for a node-format directory name addition.
+ * The leaf entry is added in xfs_dir2_leafn_add.
+ * We may enter with a freespace block that the lookup found.
+ */
+STATIC int                                     /* error */
+xfs_dir2_node_addname_int(
+       xfs_da_args_t           *args,          /* operation arguments */
+       xfs_da_state_blk_t      *fblk)          /* optional freespace block */
+{
+       xfs_dir2_data_t         *data;          /* data block structure */
+       xfs_dir2_db_t           dbno;           /* data block number */
+       xfs_dabuf_t             *dbp;           /* data block buffer */
+       xfs_dir2_data_entry_t   *dep;           /* data entry pointer */
+       xfs_inode_t             *dp;            /* incore directory inode */
+       xfs_dir2_data_unused_t  *dup;           /* data unused entry pointer */
+       int                     error;          /* error return value */
+       xfs_dir2_db_t           fbno;           /* freespace block number */
+       xfs_dabuf_t             *fbp;           /* freespace buffer */
+       int                     findex;         /* freespace entry index */
+       xfs_dir2_db_t           foundbno;       /* found freespace block no */
+       int                     foundindex;     /* found freespace entry idx */
+       xfs_dir2_free_t         *free;          /* freespace block structure */
+       xfs_dir2_db_t           ifbno;          /* initial freespace block no */
+       xfs_dir2_db_t           lastfbno;       /* highest freespace block no */
+       int                     length;         /* length of the new entry */
+       int                     logfree;        /* need to log free entry */
+       xfs_mount_t             *mp;            /* filesystem mount point */
+       int                     needlog;        /* need to log data header */
+       int                     needscan;       /* need to rescan data frees */
+       xfs_dir2_data_off_t     *tagp;          /* data entry tag pointer */
+       xfs_trans_t             *tp;            /* transaction pointer */
+
+       dp = args->dp;
+       mp = dp->i_mount;
+       tp = args->trans;
+       length = XFS_DIR2_DATA_ENTSIZE(args->namelen);
+       /*
+        * If we came in with a freespace block that means that lookup
+        * found an entry with our hash value.  This is the freespace
+        * block for that data entry.
+        */
+       if (fblk) {
+               fbp = fblk->bp;
+               /*
+                * Remember initial freespace block number.
+                */
+               ifbno = fblk->blkno;
+               free = fbp->data;
+               ASSERT(INT_GET(free->hdr.magic, ARCH_CONVERT) == XFS_DIR2_FREE_MAGIC);
+               findex = fblk->index;
+               /*
+                * This means the free entry showed that the data block had
+                * space for our entry, so we remembered it.
+                * Use that data block.
+                */
+               if (findex >= 0) {
+                       ASSERT(findex < INT_GET(free->hdr.nvalid, ARCH_CONVERT));
+                       ASSERT(INT_GET(free->bests[findex], ARCH_CONVERT) != NULLDATAOFF);
+                       ASSERT(INT_GET(free->bests[findex], ARCH_CONVERT) >= length);
+                       dbno = INT_GET(free->hdr.firstdb, ARCH_CONVERT) + findex;
+               }
+               /*
+                * The data block looked at didn't have enough room.
+                * We'll start at the beginning of the freespace entries.
+                */
+               else {
+                       dbno = -1;
+                       findex = 0;
+               }
+       }
+       /*
+        * Didn't come in with a freespace block, so don't have a data block.
+        */
+       else {
+               ifbno = dbno = -1;
+               fbp = NULL;
+               findex = 0;
+       }
+       /*
+        * If we don't have a data block yet, we're going to scan the 
+        * freespace blocks looking for one.  Figure out what the
+        * highest freespace block number is.
+        */
+       if (dbno == -1) {
+               xfs_fileoff_t   fo;             /* freespace block number */
+
+               if (error = xfs_bmap_last_offset(tp, dp, &fo, XFS_DATA_FORK))
+                       return error;
+               lastfbno = XFS_DIR2_DA_TO_DB(mp, (xfs_dablk_t)fo);
+               fbno = ifbno;
+               foundindex = -1;
+       }
+       /*
+        * While we haven't identified a data block, search the freeblock
+        * data for a good data block.  If we find a null freeblock entry,
+        * indicating a hole in the data blocks, remember that.
+        */
+       while (dbno == -1) {
+               /*
+                * If we don't have a freeblock in hand, get the next one.
+                */
+               if (fbp == NULL) {
+                       /*
+                        * Happens the first time through unless lookup gave
+                        * us a freespace block to start with.
+                        */
+                       if (++fbno == 0)
+                               fbno = XFS_DIR2_FREE_FIRSTDB(mp);
+                       /*
+                        * If it's ifbno we already looked at it.
+                        */
+                       if (fbno == ifbno)
+                               fbno++;
+                       /*
+                        * If it's off the end we're done.
+                        */
+                       if (fbno >= lastfbno)
+                               break;
+                       /*
+                        * Read the block.  There can be holes in the
+                        * freespace blocks, so this might not succeed.
+                        * This should be really rare, so there's no reason
+                        * to avoid it.
+                        */
+                       if (error = xfs_da_read_buf(tp, dp,
+                                       XFS_DIR2_DB_TO_DA(mp, fbno), -1, &fbp,
+                                       XFS_DATA_FORK)) {
+#pragma mips_frequency_hint NEVER
+                               return error;
+                       }
+                       if (fbp == NULL) {
+#pragma mips_frequency_hint NEVER
+                               continue;
+                       }
+                       free = fbp->data;
+                       ASSERT(INT_GET(free->hdr.magic, ARCH_CONVERT) == XFS_DIR2_FREE_MAGIC);
+                       findex = 0;
+               }
+               /*
+                * Look at the current free entry.  Is it good enough?
+                */
+               if (INT_GET(free->bests[findex], ARCH_CONVERT) != NULLDATAOFF &&
+                   INT_GET(free->bests[findex], ARCH_CONVERT) >= length)
+                       dbno = INT_GET(free->hdr.firstdb, ARCH_CONVERT) + findex;
+               else {
+                       /*
+                        * If we haven't found an empty entry yet, and this
+                        * one is empty, remember this slot.
+                        */
+                       if (foundindex == -1 &&
+                           INT_GET(free->bests[findex], ARCH_CONVERT) == NULLDATAOFF) {
+                               foundindex = findex;
+                               foundbno = fbno;
+                       }
+                       /*
+                        * Are we done with the freeblock?
+                        */
+                       if (++findex == INT_GET(free->hdr.nvalid, ARCH_CONVERT)) {
+                               /*
+                                * If there is space left in this freeblock,
+                                * and we don't have an empty entry yet,
+                                * remember this slot.
+                                */
+                               if (foundindex == -1 &&
+                                   findex < XFS_DIR2_MAX_FREE_BESTS(mp)) {
+                                       foundindex = findex;
+                                       foundbno = fbno;
+                               }
+                               /*
+                                * Drop the block.
+                                */
+                               xfs_da_brelse(tp, fbp);
+                               fbp = NULL;
+                               if (fblk && fblk->bp)
+                                       fblk->bp = NULL;
+                       }
+               }
+       }
+       /*
+        * If we don't have a data block, and there's no free slot in a
+        * freeblock, we need to add a new freeblock.
+        */
+       if (dbno == -1 && foundindex == -1) {
+#pragma mips_frequency_hint NEVER
+               /*
+                * Not allowed to allocate, so return failure.
+                */
+               if (args->justcheck || args->total == 0) {
+                       return XFS_ERROR(ENOSPC);
+               }
+               /*
+                * Add the new freeblock.
+                */
+               if (error = xfs_dir2_grow_inode(args, XFS_DIR2_FREE_SPACE,
+                               &fbno)) {
+                       return error;
+               }
+               /*
+                * Get a buffer for the new block.
+                */
+               if (error = xfs_da_get_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, fbno),
+                               -1, &fbp, XFS_DATA_FORK)) {
+                       return error;
+               }
+               ASSERT(fbp != NULL);
+               /*
+                * Initialize the new block to be empty, and remember
+                * its first slot as our empty slot.
+                */
+               free = fbp->data;
+               INT_SET(free->hdr.magic, ARCH_CONVERT, XFS_DIR2_FREE_MAGIC);
+               INT_SET(free->hdr.firstdb, ARCH_CONVERT, (fbno - XFS_DIR2_FREE_FIRSTDB(mp)) *
+                       XFS_DIR2_MAX_FREE_BESTS(mp));
+                INT_ZERO(free->hdr.nused, ARCH_CONVERT);
+               INT_ZERO(free->hdr.nvalid, ARCH_CONVERT);
+               foundindex = 0;
+               foundbno = fbno;
+       }
+       /*
+        * If we don't have a data block, and we don't have a freeblock buffer
+        * in hand (we dropped the one with the free slot in it),
+        * go read the freeblock again.
+        */
+       if (dbno == -1 && fbp == NULL) {
+#pragma mips_frequency_hint NEVER
+               /*
+                * We're going to use the empty slot we found before.
+                */
+               findex = foundindex;
+               fbno = foundbno;
+               if (error = xfs_da_read_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, fbno),
+                               -1, &fbp, XFS_DATA_FORK)) {
+                       return error;
+               }
+               ASSERT(fbp != NULL);
+               free = fbp->data;
+               ASSERT(INT_GET(free->hdr.magic, ARCH_CONVERT) == XFS_DIR2_FREE_MAGIC);
+       }
+       /*
+        * If we don't have a data block, we need to allocate one and make
+        * the freespace entries refer to it.
+        */
+       if (dbno == -1) {
+#pragma mips_frequency_hint NEVER
+               /*
+                * Not allowed to allocate, return failure.
+                */
+               if (args->justcheck || args->total == 0) {
+                       /*
+                        * Drop the freespace buffer unless it came from our
+                        * caller.
+                        */
+                       if (fblk == NULL || fblk->bp == NULL)
+                               xfs_da_buf_done(fbp);
+                       return XFS_ERROR(ENOSPC);
+               }
+               /*
+                * Allocate and initialize the new data block.
+                */
+               if ((error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE,
+                               &dbno)) ||
+                   (error = xfs_dir2_data_init(args, dbno, &dbp))) {
+                       /*
+                        * Drop the freespace buffer unless it came from our
+                        * caller.
+                        */
+                       if (fblk == NULL || fblk->bp == NULL)
+                               xfs_da_buf_done(fbp);
+                       return error;
+               }
+               /*
+                * If the freespace entry for this data block is not in the
+                * freespace block we have in hand, drop the one we have
+                * and get the right one.
+                */
+               if (XFS_DIR2_DB_TO_FDB(mp, dbno) != fbno) {
+                       xfs_da_brelse(tp, fbp);
+                       if (fblk && fblk->bp)
+                               fblk->bp = NULL;
+                       fbno = XFS_DIR2_DB_TO_FDB(mp, dbno);
+                       if (error = xfs_da_read_buf(tp, dp,
+                                       XFS_DIR2_DB_TO_DA(mp, fbno), -1, &fbp,
+                                       XFS_DATA_FORK)) {
+                               xfs_da_buf_done(dbp);
+                               return error;
+                       }
+                       ASSERT(fbp != NULL);
+                       free = fbp->data;
+                       ASSERT(INT_GET(free->hdr.magic, ARCH_CONVERT) == XFS_DIR2_FREE_MAGIC);
+               }
+               /*
+                * Set the freespace block index from the data block number.
+                */
+               findex = XFS_DIR2_DB_TO_FDINDEX(mp, dbno);
+               /*
+                * If it's after the end of the current entries in the
+                * freespace block, extend that table.
+                */
+               if (findex >= INT_GET(free->hdr.nvalid, ARCH_CONVERT)) {
+                       ASSERT(findex < XFS_DIR2_MAX_FREE_BESTS(mp));
+                       INT_SET(free->hdr.nvalid, ARCH_CONVERT, findex + 1);
+                       /*
+                        * Tag new entry so nused will go up.
+                        */
+                       INT_SET(free->bests[findex], ARCH_CONVERT, NULLDATAOFF);
+               }
+               /*
+                * If this entry was for an empty data block
+                * (this should always be true) then update the header.
+                */
+               if (INT_GET(free->bests[findex], ARCH_CONVERT) == NULLDATAOFF) {
+                       INT_MOD(free->hdr.nused, ARCH_CONVERT, +1);
+                       xfs_dir2_free_log_header(tp, fbp);
+               }
+               /*
+                * Update the real value in the table.
+                * We haven't allocated the data entry yet so this will
+                * change again.
+                */
+               data = dbp->data;
+               INT_COPY(free->bests[findex], data->hdr.bestfree[0].length, ARCH_CONVERT); 
+               logfree = 1;
+       }
+       /*
+        * We had a data block so we don't have to make a new one.
+        */
+       else {
+               /*
+                * If just checking, we succeeded.
+                */
+               if (args->justcheck) {
+                       if (fblk == NULL || fblk->bp == NULL)
+                               xfs_da_buf_done(fbp);
+                       return 0;
+               }
+               /*
+                * Read the data block in.
+                */
+               if (error = xfs_da_read_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, dbno),
+                               -1, &dbp, XFS_DATA_FORK)) {
+#pragma mips_frequency_hint NEVER
+                       if (fblk == NULL || fblk->bp == NULL)
+                               xfs_da_buf_done(fbp);
+                       return error;
+               }
+               data = dbp->data;
+               logfree = 0;
+       }
+       ASSERT(INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT) >= length);
+       /*
+        * Point to the existing unused space.
+        */
+       dup = (xfs_dir2_data_unused_t *)
+             ((char *)data + INT_GET(data->hdr.bestfree[0].offset, ARCH_CONVERT));
+       needscan = needlog = 0;
+       /*
+        * Mark the first part of the unused space, inuse for us.
+        */
+       xfs_dir2_data_use_free(tp, dbp, dup,
+               (xfs_dir2_data_aoff_t)((char *)dup - (char *)data), length,
+               &needlog, &needscan);
+       /*
+        * Fill in the new entry and log it.
+        */
+       dep = (xfs_dir2_data_entry_t *)dup;
+       INT_SET(dep->inumber, ARCH_CONVERT, args->inumber);
+       dep->namelen = args->namelen;
+       bcopy(args->name, dep->name, dep->namelen);
+       tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
+       INT_SET(*tagp, ARCH_CONVERT, (xfs_dir2_data_off_t)((char *)dep - (char *)data));
+       xfs_dir2_data_log_entry(tp, dbp, dep);
+       /*
+        * Rescan the block for bestfree if needed.
+        */
+       if (needscan)
+               xfs_dir2_data_freescan(mp, data, &needlog, NULL);
+       /*
+        * Log the data block header if needed.
+        */
+       if (needlog)
+               xfs_dir2_data_log_header(tp, dbp);
+       /*
+        * If the freespace entry is now wrong, update it.
+        */
+       if (INT_GET(free->bests[findex], ARCH_CONVERT) != INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT)) {
+               INT_COPY(free->bests[findex], data->hdr.bestfree[0].length, ARCH_CONVERT);
+               logfree = 1;
+       }
+       /*
+        * Log the freespace entry if needed.
+        */
+       if (logfree)
+               xfs_dir2_free_log_bests(tp, fbp, findex, findex);
+       /*
+        * If the caller didn't hand us the freespace block, drop it.
+        */
+       if (fblk == NULL || fblk->bp == NULL)
+               xfs_da_buf_done(fbp);
+       /*
+        * Return the data block and offset in args, then drop the data block.
+        */
+       args->blkno = (xfs_dablk_t)dbno;
+       args->index = INT_GET(*tagp, ARCH_CONVERT);
+       xfs_da_buf_done(dbp);
+       return 0;
+}
+
+/*
+ * Lookup an entry in a node-format directory.
+ * All the real work happens in xfs_da_node_lookup_int.
+ * The only real output is the inode number of the entry.
+ */
+int                                            /* error */
+xfs_dir2_node_lookup(
+       xfs_da_args_t   *args)                  /* operation arguments */
+{
+       int             error;                  /* error return value */
+       int             i;                      /* btree level */
+       int             rval;                   /* operation return value */
+       xfs_da_state_t  *state;                 /* btree cursor */
+
+       xfs_dir2_trace_args("node_lookup", args);
+       /*
+        * Allocate and initialize the btree cursor.
+        */
+       state = xfs_da_state_alloc();
+       state->args = args;
+       state->mp = args->dp->i_mount;
+       state->blocksize = state->mp->m_dirblksize;
+       /*
+        * Fill in the path to the entry in the cursor.
+        */
+       error = xfs_da_node_lookup_int(state, &rval);
+       if (error)
+               rval = error;
+       /*
+        * Release the btree blocks and leaf block.
+        */
+       for (i = 0; i < state->path.active; i++) {
+               xfs_da_brelse(args->trans, state->path.blk[i].bp);
+               state->path.blk[i].bp = NULL;
+       }
+       /*
+        * Release the data block if we have it.
+        */
+       if (state->extravalid && state->extrablk.bp) {
+               xfs_da_brelse(args->trans, state->extrablk.bp);
+               state->extrablk.bp = NULL;
+       }
+       xfs_da_state_free(state);
+       return rval;
+}
+
+/*
+ * Remove an entry from a node-format directory.
+ */
+int                                            /* error */
+xfs_dir2_node_removename(
+       xfs_da_args_t           *args)          /* operation arguments */
+{
+       xfs_da_state_blk_t      *blk;           /* leaf block */
+       int                     error;          /* error return value */
+       int                     rval;           /* operation return value */
+       xfs_da_state_t          *state;         /* btree cursor */
+
+       xfs_dir2_trace_args("node_removename", args);
+       /*
+        * Allocate and initialize the btree cursor.
+        */
+       state = xfs_da_state_alloc();
+       state->args = args;
+       state->mp = args->dp->i_mount;
+       state->blocksize = state->mp->m_dirblksize;
+       /*
+        * Look up the entry we're deleting, set up the cursor.
+        */
+       error = xfs_da_node_lookup_int(state, &rval);
+       if (error) {
+#pragma mips_frequency_hint NEVER
+               rval = error;
+       }
+       /*
+        * Didn't find it, upper layer screwed up.
+        */
+       if (rval != EEXIST) {
+#pragma mips_frequency_hint NEVER
+               xfs_da_state_free(state);
+               return rval;
+       }
+       blk = &state->path.blk[state->path.active - 1];
+       ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC);
+       ASSERT(state->extravalid);
+       /*
+        * Remove the leaf and data entries.
+        * Extrablk refers to the data block.
+        */
+       error = xfs_dir2_leafn_remove(args, blk->bp, blk->index,
+               &state->extrablk, &rval);
+       if (error) {
+#pragma mips_frequency_hint NEVER
+               return error;
+       }
+       /*
+        * Fix the hash values up the btree.
+        */
+       xfs_da_fixhashpath(state, &state->path);
+       /*
+        * If we need to join leaf blocks, do it.
+        */
+       if (rval && state->path.active > 1)
+               error = xfs_da_join(state);
+       /*
+        * If no errors so far, try conversion to leaf format.
+        */
+       if (!error)
+               error = xfs_dir2_node_to_leaf(state);
+       xfs_da_state_free(state);
+       return error;
+}
+
+/*
+ * Replace an entry's inode number in a node-format directory.
+ */
+int                                            /* error */
+xfs_dir2_node_replace(
+       xfs_da_args_t           *args)          /* operation arguments */
+{
+       xfs_da_state_blk_t      *blk;           /* leaf block */
+       xfs_dir2_data_t         *data;          /* data block structure */
+       xfs_dir2_data_entry_t   *dep;           /* data entry changed */
+       int                     error;          /* error return value */
+       int                     i;              /* btree level */
+       xfs_ino_t               inum;           /* new inode number */
+       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
+       xfs_dir2_leaf_entry_t   *lep;           /* leaf entry being changed */
+       int                     rval;           /* internal return value */
+       xfs_da_state_t          *state;         /* btree cursor */
+
+       xfs_dir2_trace_args("node_replace", args);
+       /*
+        * Allocate and initialize the btree cursor.
+        */
+       state = xfs_da_state_alloc();
+       state->args = args;
+       state->mp = args->dp->i_mount;
+       state->blocksize = state->mp->m_dirblksize;
+       inum = args->inumber;
+       /*
+        * Lookup the entry to change in the btree.
+        */
+       error = xfs_da_node_lookup_int(state, &rval);
+       if (error) {
+#pragma mips_frequency_hint NEVER
+               rval = error;
+       }
+       /*
+        * It should be found, since the vnodeops layer has looked it up
+        * and locked it.  But paranoia is good.
+        */
+       if (rval == EEXIST) {
+               /*
+                * Find the leaf entry.
+                */
+               blk = &state->path.blk[state->path.active - 1];
+               ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC);
+               leaf = blk->bp->data;
+               lep = &leaf->ents[blk->index];
+               ASSERT(state->extravalid);
+               /*
+                * Point to the data entry.
+                */
+               data = state->extrablk.bp->data;
+               ASSERT(INT_GET(data->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC);
+               dep = (xfs_dir2_data_entry_t *)
+                     ((char *)data +
+                      XFS_DIR2_DATAPTR_TO_OFF(state->mp, INT_GET(lep->address, ARCH_CONVERT)));
+               ASSERT(inum != INT_GET(dep->inumber, ARCH_CONVERT));
+               /*
+                * Fill in the new inode number and log the entry.
+                */
+               INT_SET(dep->inumber, ARCH_CONVERT, inum);
+               xfs_dir2_data_log_entry(args->trans, state->extrablk.bp, dep);
+               rval = 0;
+       }
+       /*
+        * Didn't find it, and we're holding a data block.  Drop it.
+        */
+       else if (state->extravalid) {
+#pragma mips_frequency_hint NEVER
+               xfs_da_brelse(args->trans, state->extrablk.bp);
+               state->extrablk.bp = NULL;
+       }
+       /*
+        * Release all the buffers in the cursor.
+        */
+       for (i = 0; i < state->path.active; i++) {
+               xfs_da_brelse(args->trans, state->path.blk[i].bp);
+               state->path.blk[i].bp = NULL;
+       }
+       xfs_da_state_free(state);
+       return rval;
+}
+
+/*
+ * Trim off a trailing empty freespace block.
+ * Return (in rvalp) 1 if we did it, 0 if not.
+ */
+int                                            /* error */
+xfs_dir2_node_trim_free(
+       xfs_da_args_t           *args,          /* operation arguments */
+       xfs_fileoff_t           fo,             /* free block number */
+       int                     *rvalp)         /* out: did something */
+{
+       xfs_dabuf_t             *bp;            /* freespace buffer */
+       xfs_inode_t             *dp;            /* incore directory inode */
+       int                     error;          /* error return code */
+       xfs_dir2_free_t         *free;          /* freespace structure */
+       xfs_mount_t             *mp;            /* filesystem mount point */
+       xfs_trans_t             *tp;            /* transaction pointer */
+
+       dp = args->dp;
+       mp = dp->i_mount;
+       tp = args->trans;
+       /*
+        * Read the freespace block.
+        */
+       if (error = xfs_da_read_buf(tp, dp, (xfs_dablk_t)fo, -1, &bp,
+                       XFS_DATA_FORK)) {
+#pragma mips_frequency_hint NEVER
+               return error;
+       }
+       free = bp->data;
+       ASSERT(INT_GET(free->hdr.magic, ARCH_CONVERT) == XFS_DIR2_FREE_MAGIC);
+       /*
+        * If there are used entries, there's nothing to do.
+        */
+       if (INT_GET(free->hdr.nused, ARCH_CONVERT) > 0) {
+               xfs_da_brelse(tp, bp);
+               *rvalp = 0;
+               return 0;
+       }
+       /*
+        * Blow the block away.
+        */
+       if (error =
+           xfs_dir2_shrink_inode(args, XFS_DIR2_DA_TO_DB(mp, (xfs_dablk_t)fo),
+                   bp)) {
+               /*
+                * Can't fail with ENOSPC since that only happens with no
+                * space reservation, when breaking up an extent into two
+                * pieces.  This is the last block of an extent.
+                */
+               ASSERT(error != ENOSPC);
+               xfs_da_brelse(tp, bp);
+               return error;
+       }
+       /*
+        * Return that we succeeded.
+        */
+       *rvalp = 1;
+       return 0;
+}
diff --git a/libxfs/xfs_dir2_sf.c b/libxfs/xfs_dir2_sf.c
new file mode 100644 (file)
index 0000000..a021822
--- /dev/null
@@ -0,0 +1,1119 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+/*
+ * xfs_dir2_sf.c
+ * Shortform directory implementation for v2 directories.
+ */
+
+#include <xfs.h>
+
+
+/*
+ * Given a block directory (dp/block), calculate its size as a shortform (sf)
+ * directory and a header for the sf directory, if it will fit it the
+ * space currently present in the inode.  If it won't fit, the output
+ * size is too big (but not accurate).
+ */
+int                                            /* size for sf form */
+xfs_dir2_block_sfsize(
+       xfs_inode_t             *dp,            /* incore inode pointer */
+       xfs_dir2_block_t        *block,         /* block directory data */
+       xfs_dir2_sf_hdr_t       *sfhp)          /* output: header for sf form */
+{
+       xfs_dir2_dataptr_t      addr;           /* data entry address */
+       xfs_dir2_leaf_entry_t   *blp;           /* leaf area of the block */
+       xfs_dir2_block_tail_t   *btp;           /* tail area of the block */
+       int                     count;          /* shortform entry count */
+       xfs_dir2_data_entry_t   *dep;           /* data entry in the block */
+       int                     i;              /* block entry index */
+       int                     i8count;        /* count of big-inode entries */
+       int                     isdot;          /* entry is "." */
+       int                     isdotdot;       /* entry is ".." */
+       xfs_mount_t             *mp;            /* mount structure pointer */
+       int                     namelen;        /* total name bytes */
+       xfs_ino_t               parent;         /* parent inode number */
+       int                     size;           /* total computed size */
+
+       mp = dp->i_mount;
+
+       count = i8count = namelen = 0;
+       btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+       blp = XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT);
+        
+       /*
+        * Iterate over the block's data entries by using the leaf pointers.
+        */
+       for (i = 0; i < INT_GET(btp->count, ARCH_CONVERT); i++) {
+               if ((addr = INT_GET(blp[i].address, ARCH_CONVERT)) == XFS_DIR2_NULL_DATAPTR)
+                       continue;
+               /*
+                * Calculate the pointer to the entry at hand.
+                */
+               dep = (xfs_dir2_data_entry_t *)
+                     ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, addr));
+               /*
+                * Detect . and .., so we can special-case them.
+                * . is not included in sf directories.
+                * .. is included by just the parent inode number.
+                */
+               isdot = dep->namelen == 1 && dep->name[0] == '.';
+               isdotdot =
+                       dep->namelen == 2 &&
+                       dep->name[0] == '.' && dep->name[1] == '.';
+#if XFS_BIG_FILESYSTEMS
+               if (!isdot)
+                       i8count += INT_GET(dep->inumber, ARCH_CONVERT) > XFS_DIR2_MAX_SHORT_INUM;
+#endif
+               if (!isdot && !isdotdot) {
+                       count++;
+                       namelen += dep->namelen;
+               } else if (isdotdot)
+                       parent = INT_GET(dep->inumber, ARCH_CONVERT);
+               /*
+                * Calculate the new size, see if we should give up yet.
+                */
+               size = XFS_DIR2_SF_HDR_SIZE(i8count) +          /* header */
+                      count +                                  /* namelen */
+                      count * (uint)sizeof(xfs_dir2_sf_off_t) + /* offset */
+                      namelen +                                /* name */
+                      (i8count ?                               /* inumber */
+                               (uint)sizeof(xfs_dir2_ino8_t) * count :
+                               (uint)sizeof(xfs_dir2_ino4_t) * count);
+               if (size > XFS_IFORK_DSIZE(dp))
+                       return size;            /* size value is a failure */
+       }
+       /*
+        * Create the output header, if it worked.
+        */
+       sfhp->count = count;
+       sfhp->i8count = i8count;
+       XFS_DIR2_SF_PUT_INUMBER_ARCH((xfs_dir2_sf_t *)sfhp, &parent, &sfhp->parent, ARCH_CONVERT);
+       return size;
+}
+
+/*
+ * Convert a block format directory to shortform.
+ * Caller has already checked that it will fit, and built us a header.
+ */
+int                                            /* error */
+xfs_dir2_block_to_sf(
+       xfs_da_args_t           *args,          /* operation arguments */
+       xfs_dabuf_t             *bp,            /* block buffer */
+       int                     size,           /* shortform directory size */
+       xfs_dir2_sf_hdr_t       *sfhp)          /* shortform directory hdr */
+{
+       xfs_dir2_block_t        *block;         /* block structure */
+       xfs_dir2_block_tail_t   *btp;           /* block tail pointer */
+       xfs_dir2_data_entry_t   *dep;           /* data entry pointer */
+       xfs_inode_t             *dp;            /* incore directory inode */
+       xfs_dir2_data_unused_t  *dup;           /* unused data pointer */
+       char                    *endptr;        /* end of data entries */
+       int                     error;          /* error return value */
+       int                     logflags;       /* inode logging flags */
+       xfs_mount_t             *mp;            /* filesystem mount point */
+       char                    *ptr;           /* current data pointer */
+       xfs_dir2_sf_entry_t     *sfep;          /* shortform entry */
+       xfs_dir2_sf_t           *sfp;           /* shortform structure */
+        xfs_ino_t               temp;
+                
+       xfs_dir2_trace_args_sb("block_to_sf", args, size, bp);
+       dp = args->dp;
+       mp = dp->i_mount;
+
+       /*
+        * Make a copy of the block data, so we can shrink the inode
+        * and add local data.
+        */
+       block = kmem_alloc(mp->m_dirblksize, KM_SLEEP);
+       bcopy(bp->data, block, mp->m_dirblksize);
+       logflags = XFS_ILOG_CORE;
+       if (error = xfs_dir2_shrink_inode(args, mp->m_dirdatablk, bp)) {
+#pragma mips_frequency_hint NEVER
+               ASSERT(error != ENOSPC);
+               goto out;
+       }
+       /*
+        * The buffer is now unconditionally gone, whether
+        * xfs_dir2_shrink_inode worked or not.
+        *
+        * Convert the inode to local format.
+        */
+       dp->i_df.if_flags &= ~XFS_IFEXTENTS;
+       dp->i_df.if_flags |= XFS_IFINLINE;
+       dp->i_d.di_format = XFS_DINODE_FMT_LOCAL;
+       ASSERT(dp->i_df.if_bytes == 0);
+       xfs_idata_realloc(dp, size, XFS_DATA_FORK);
+       logflags |= XFS_ILOG_DDATA;
+       /*
+        * Copy the header into the newly allocate local space.
+        */
+       sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+       bcopy(sfhp, sfp, XFS_DIR2_SF_HDR_SIZE(sfhp->i8count));
+       dp->i_d.di_size = size;
+       /*
+        * Set up to loop over the block's entries.
+        */
+       btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+       ptr = (char *)block->u;
+       endptr = (char *)XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT);
+       sfep = XFS_DIR2_SF_FIRSTENTRY(sfp);
+       /*
+        * Loop over the active and unused entries.
+        * Stop when we reach the leaf/tail portion of the block.
+        */
+       while (ptr < endptr) {
+               /*
+                * If it's unused, just skip over it.
+                */
+               dup = (xfs_dir2_data_unused_t *)ptr;
+               if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG) {
+                       ptr += INT_GET(dup->length, ARCH_CONVERT);
+                       continue;
+               }
+               dep = (xfs_dir2_data_entry_t *)ptr;
+               /*
+                * Skip .
+                */
+               if (dep->namelen == 1 && dep->name[0] == '.')
+                       ASSERT(INT_GET(dep->inumber, ARCH_CONVERT) == dp->i_ino);
+               /*
+                * Skip .., but make sure the inode number is right.
+                */
+               else if (dep->namelen == 2 &&
+                        dep->name[0] == '.' && dep->name[1] == '.')
+                       ASSERT(INT_GET(dep->inumber, ARCH_CONVERT) ==
+                              XFS_DIR2_SF_GET_INUMBER_ARCH(sfp, &sfp->hdr.parent, ARCH_CONVERT));
+               /*
+                * Normal entry, copy it into shortform.
+                */
+               else {
+                       sfep->namelen = dep->namelen;
+                       XFS_DIR2_SF_PUT_OFFSET_ARCH(sfep,
+                               (xfs_dir2_data_aoff_t)
+                               ((char *)dep - (char *)block), ARCH_CONVERT);
+                       bcopy(dep->name, sfep->name, dep->namelen);
+                        temp=INT_GET(dep->inumber, ARCH_CONVERT);
+                       XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp, &temp,
+                               XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT);
+                       sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep);
+               }
+               ptr += XFS_DIR2_DATA_ENTSIZE(dep->namelen);
+       }
+       ASSERT((char *)sfep - (char *)sfp == size);
+       xfs_dir2_sf_check(args);
+out:
+       xfs_trans_log_inode(args->trans, dp, logflags);
+       kmem_free(block, mp->m_dirblksize);
+       return error;
+}
+
+/*
+ * Add a name to a shortform directory.
+ * There are two algorithms, "easy" and "hard" which we decide on 
+ * before changing anything.
+ * Convert to block form if necessary, if the new entry won't fit.
+ */
+int                                            /* error */
+xfs_dir2_sf_addname(
+       xfs_da_args_t           *args)          /* operation arguments */
+{
+       int                     add_entsize;    /* size of the new entry */
+       xfs_inode_t             *dp;            /* incore directory inode */
+       int                     error;          /* error return value */
+       int                     incr_isize;     /* total change in size */
+       int                     new_isize;      /* di_size after adding name */
+       int                     objchange;      /* changing to 8-byte inodes */
+       xfs_dir2_data_aoff_t    offset;         /* offset for new entry */
+       int                     old_isize;      /* di_size before adding name */
+       int                     pick;           /* which algorithm to use */
+       xfs_dir2_sf_t           *sfp;           /* shortform structure */
+       xfs_dir2_sf_entry_t     *sfep;          /* shortform entry */
+
+       xfs_dir2_trace_args("sf_addname", args);
+       ASSERT(xfs_dir2_sf_lookup(args) == ENOENT);
+       dp = args->dp;
+       ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
+       /*
+        * Make sure the shortform value has some of its header.
+        */
+       if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) {
+#pragma mips_frequency_hint NEVER
+               ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
+               return XFS_ERROR(EIO);
+       }
+       ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
+       ASSERT(dp->i_df.if_u1.if_data != NULL);
+       sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+       ASSERT(dp->i_d.di_size >= XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count));
+       /*
+        * Compute entry (and change in) size.
+        */
+       add_entsize = XFS_DIR2_SF_ENTSIZE_BYNAME(sfp, args->namelen);
+       incr_isize = add_entsize;
+#if XFS_BIG_FILESYSTEMS
+       /*
+        * Do we have to change to 8 byte inodes?
+        */
+       if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->hdr.i8count == 0) {
+#pragma mips_frequency_hint NEVER
+               /*
+                * Yes, adjust the entry size and the total size.
+                */
+               add_entsize +=
+                       (uint)sizeof(xfs_dir2_ino8_t) -
+                       (uint)sizeof(xfs_dir2_ino4_t);
+               incr_isize +=
+                       (sfp->hdr.count + 2) *
+                       ((uint)sizeof(xfs_dir2_ino8_t) -
+                        (uint)sizeof(xfs_dir2_ino4_t));
+               objchange = 1;
+       } else
+               objchange = 0;
+#else
+       objchange = 0;
+#endif
+       old_isize = (int)dp->i_d.di_size;
+       new_isize = old_isize + incr_isize;
+       /*
+        * Won't fit as shortform any more (due to size),
+        * or the pick routine says it won't (due to offset values).
+        */
+       if (new_isize > XFS_IFORK_DSIZE(dp) ||
+           (pick =
+            xfs_dir2_sf_addname_pick(args, objchange, &sfep, &offset)) == 0) {
+#pragma mips_frequency_hint NEVER
+               /*
+                * Just checking or no space reservation, it doesn't fit.
+                */
+               if (args->justcheck || args->total == 0)
+                       return XFS_ERROR(ENOSPC);
+               /*
+                * Convert to block form then add the name.
+                */
+               error = xfs_dir2_sf_to_block(args);
+               if (error)
+                       return error;
+               return xfs_dir2_block_addname(args);
+       }
+       /*
+        * Just checking, it fits.
+        */
+       if (args->justcheck)
+               return 0;
+       /*
+        * Do it the easy way - just add it at the end.
+        */
+       if (pick == 1)
+               xfs_dir2_sf_addname_easy(args, sfep, offset, new_isize);
+       /*
+        * Do it the hard way - look for a place to insert the new entry.
+        * Convert to 8 byte inode numbers first if necessary.
+        */
+       else {
+               ASSERT(pick == 2);
+#if XFS_BIG_FILESYSTEMS
+               if (objchange)
+                       xfs_dir2_sf_toino8(args);
+#endif
+               xfs_dir2_sf_addname_hard(args, objchange, new_isize);
+       }
+       xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
+       return 0;
+}
+
+/*
+ * Add the new entry the "easy" way.
+ * This is copying the old directory and adding the new entry at the end.
+ * Since it's sorted by "offset" we need room after the last offset
+ * that's already there, and then room to convert to a block directory.
+ * This is already checked by the pick routine.
+ */
+STATIC void
+xfs_dir2_sf_addname_easy(
+       xfs_da_args_t           *args,          /* operation arguments */
+       xfs_dir2_sf_entry_t     *sfep,          /* pointer to new entry */
+       xfs_dir2_data_aoff_t    offset,         /* offset to use for new ent */
+       int                     new_isize)      /* new directory size */
+{
+       int                     byteoff;        /* byte offset in sf dir */
+       xfs_inode_t             *dp;            /* incore directory inode */
+       xfs_dir2_sf_t           *sfp;           /* shortform structure */
+
+       dp = args->dp;
+
+       sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+       byteoff = (int)((char *)sfep - (char *)sfp);
+       /*
+        * Grow the in-inode space.
+        */
+       xfs_idata_realloc(dp, XFS_DIR2_SF_ENTSIZE_BYNAME(sfp, args->namelen),
+               XFS_DATA_FORK);
+       /*
+        * Need to set up again due to realloc of the inode data.
+        */
+       sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+       sfep = (xfs_dir2_sf_entry_t *)((char *)sfp + byteoff);
+       /*
+        * Fill in the new entry.
+        */
+       sfep->namelen = args->namelen;
+       XFS_DIR2_SF_PUT_OFFSET_ARCH(sfep, offset, ARCH_CONVERT);
+       bcopy(args->name, sfep->name, sfep->namelen);
+       XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp, &args->inumber,
+               XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT);
+       /*
+        * Update the header and inode.
+        */
+       sfp->hdr.count++;
+#if XFS_BIG_FILESYSTEMS
+       if (args->inumber > XFS_DIR2_MAX_SHORT_INUM)
+               sfp->hdr.i8count++;
+#endif
+       dp->i_d.di_size = new_isize;
+       xfs_dir2_sf_check(args);
+}
+
+/*
+ * Add the new entry the "hard" way.
+ * The caller has already converted to 8 byte inode numbers if necessary,
+ * in which case we need to leave the i8count at 1.
+ * Find a hole that the new entry will fit into, and copy
+ * the first part of the entries, the new entry, and the last part of
+ * the entries.
+ */
+/* ARGSUSED */
+STATIC void
+xfs_dir2_sf_addname_hard(
+       xfs_da_args_t           *args,          /* operation arguments */
+       int                     objchange,      /* changing inode number size */
+       int                     new_isize)      /* new directory size */
+{
+       int                     add_datasize;   /* data size need for new ent */
+       char                    buf[XFS_DIR2_SF_MAX_SIZE]; /* buffer for old */
+       xfs_inode_t             *dp;            /* incore directory inode */
+       int                     eof;            /* reached end of old dir */
+       int                     nbytes;         /* temp for byte copies */
+       xfs_dir2_data_aoff_t    new_offset;     /* next offset value */
+       xfs_dir2_data_aoff_t    offset;         /* current offset value */
+       int                     old_isize;      /* previous di_size */
+       xfs_dir2_sf_entry_t     *oldsfep;       /* entry in original dir */
+       xfs_dir2_sf_t           *oldsfp;        /* original shortform dir */
+       xfs_dir2_sf_entry_t     *sfep;          /* entry in new dir */
+       xfs_dir2_sf_t           *sfp;           /* new shortform dir */
+
+       /*
+        * Copy the old directory to the stack buffer.
+        */
+       dp = args->dp;
+
+       sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+       old_isize = (int)dp->i_d.di_size;
+       oldsfp = (xfs_dir2_sf_t *)buf;
+       bcopy(sfp, oldsfp, old_isize);
+       /*
+        * Loop over the old directory finding the place we're going
+        * to insert the new entry.
+        * If it's going to end up at the end then oldsfep will point there.
+        */
+       for (offset = XFS_DIR2_DATA_FIRST_OFFSET,
+             oldsfep = XFS_DIR2_SF_FIRSTENTRY(oldsfp),
+             add_datasize = XFS_DIR2_DATA_ENTSIZE(args->namelen),
+             eof = (char *)oldsfep == &buf[old_isize];
+            !eof;
+            offset = new_offset + XFS_DIR2_DATA_ENTSIZE(oldsfep->namelen),
+             oldsfep = XFS_DIR2_SF_NEXTENTRY(oldsfp, oldsfep),
+             eof = (char *)oldsfep == &buf[old_isize]) {
+               new_offset = XFS_DIR2_SF_GET_OFFSET_ARCH(oldsfep, ARCH_CONVERT);
+               if (offset + add_datasize <= new_offset)
+                       break;
+       }
+       /*
+        * Get rid of the old directory, then allocate space for
+        * the new one.  We do this so xfs_idata_realloc won't copy
+        * the data.
+        */
+       xfs_idata_realloc(dp, -old_isize, XFS_DATA_FORK);
+       xfs_idata_realloc(dp, new_isize, XFS_DATA_FORK);
+       /*
+        * Reset the pointer since the buffer was reallocated.
+        */
+       sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+       /*
+        * Copy the first part of the directory, including the header.
+        */
+       nbytes = (int)((char *)oldsfep - (char *)oldsfp);
+       bcopy(oldsfp, sfp, nbytes);
+       sfep = (xfs_dir2_sf_entry_t *)((char *)sfp + nbytes);
+       /*
+        * Fill in the new entry, and update the header counts.
+        */
+       sfep->namelen = args->namelen;
+       XFS_DIR2_SF_PUT_OFFSET_ARCH(sfep, offset, ARCH_CONVERT);
+       bcopy(args->name, sfep->name, sfep->namelen);
+       XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp, &args->inumber,
+               XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT);
+       sfp->hdr.count++;
+#if XFS_BIG_FILESYSTEMS
+       if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && !objchange)
+               sfp->hdr.i8count++;
+#endif
+       /*
+        * If there's more left to copy, do that.
+        */
+       if (!eof) {
+               sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep);
+               bcopy(oldsfep, sfep, old_isize - nbytes);
+       }
+       dp->i_d.di_size = new_isize;
+       xfs_dir2_sf_check(args);
+}
+
+/*
+ * Decide if the new entry will fit at all.
+ * If it will fit, pick between adding the new entry to the end (easy)
+ * or somewhere else (hard).
+ * Return 0 (won't fit), 1 (easy), 2 (hard).
+ */
+/*ARGSUSED*/
+STATIC int                                     /* pick result */
+xfs_dir2_sf_addname_pick(
+       xfs_da_args_t           *args,          /* operation arguments */
+       int                     objchange,      /* inode # size changes */
+       xfs_dir2_sf_entry_t     **sfepp,        /* out(1): new entry ptr */
+       xfs_dir2_data_aoff_t    *offsetp)       /* out(1): new offset */
+{
+       xfs_inode_t             *dp;            /* incore directory inode */
+       int                     holefit;        /* found hole it will fit in */
+       int                     i;              /* entry number */
+       xfs_mount_t             *mp;            /* filesystem mount point */
+       xfs_dir2_data_aoff_t    offset;         /* data block offset */
+       xfs_dir2_sf_entry_t     *sfep;          /* shortform entry */
+       xfs_dir2_sf_t           *sfp;           /* shortform structure */
+       int                     size;           /* entry's data size */
+       int                     used;           /* data bytes used */
+
+       dp = args->dp;
+       mp = dp->i_mount;
+
+       sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+       size = XFS_DIR2_DATA_ENTSIZE(args->namelen);
+       offset = XFS_DIR2_DATA_FIRST_OFFSET;
+       sfep = XFS_DIR2_SF_FIRSTENTRY(sfp);
+       holefit = 0;
+       /*
+        * Loop over sf entries.
+        * Keep track of data offset and whether we've seen a place
+        * to insert the new entry.
+        */
+       for (i = 0; i < sfp->hdr.count; i++) {
+               if (!holefit)
+                       holefit = offset + size <= XFS_DIR2_SF_GET_OFFSET_ARCH(sfep, ARCH_CONVERT);
+               offset = XFS_DIR2_SF_GET_OFFSET_ARCH(sfep, ARCH_CONVERT) +
+                        XFS_DIR2_DATA_ENTSIZE(sfep->namelen);
+               sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep);
+       }
+       /*
+        * Calculate data bytes used excluding the new entry, if this
+        * was a data block (block form directory).
+        */
+       used = offset +
+              (sfp->hdr.count + 3) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
+              (uint)sizeof(xfs_dir2_block_tail_t);
+       /*
+        * If it won't fit in a block form then we can't insert it,
+        * we'll go back, convert to block, then try the insert and convert
+        * to leaf.
+        */
+       if (used + (holefit ? 0 : size) > mp->m_dirblksize)
+               return 0;
+       /*
+        * If changing the inode number size, do it the hard way.
+        */
+#if XFS_BIG_FILESYSTEMS
+       if (objchange) {
+#pragma mips_frequency_hint NEVER
+               return 2;
+       }
+#else
+       ASSERT(objchange == 0);
+#endif
+       /*
+        * If it won't fit at the end then do it the hard way (use the hole).
+        */
+       if (used + size > mp->m_dirblksize)
+               return 2;
+       /*
+        * Do it the easy way.
+        */
+       *sfepp = sfep;
+       *offsetp = offset;
+       return 1;
+}
+
+#ifdef DEBUG
+/*
+ * Check consistency of shortform directory, assert if bad.
+ */
+STATIC void
+xfs_dir2_sf_check(
+       xfs_da_args_t           *args)          /* operation arguments */
+{
+       xfs_inode_t             *dp;            /* incore directory inode */
+       int                     i;              /* entry number */
+       int                     i8count;        /* number of big inode#s */
+       xfs_ino_t               ino;            /* entry inode number */
+       int                     offset;         /* data offset */
+       xfs_dir2_sf_entry_t     *sfep;          /* shortform dir entry */
+       xfs_dir2_sf_t           *sfp;           /* shortform structure */
+
+       dp = args->dp;
+
+       sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+       offset = XFS_DIR2_DATA_FIRST_OFFSET;
+       ino = XFS_DIR2_SF_GET_INUMBER_ARCH(sfp, &sfp->hdr.parent, ARCH_CONVERT);
+       i8count = ino > XFS_DIR2_MAX_SHORT_INUM;
+        
+       for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp);
+            i < sfp->hdr.count;
+            i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep)) {
+               ASSERT(XFS_DIR2_SF_GET_OFFSET_ARCH(sfep, ARCH_CONVERT) >= offset);
+               ino = XFS_DIR2_SF_GET_INUMBER_ARCH(sfp, XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT);
+               i8count += ino > XFS_DIR2_MAX_SHORT_INUM;
+               offset =
+                       XFS_DIR2_SF_GET_OFFSET_ARCH(sfep, ARCH_CONVERT) +
+                       XFS_DIR2_DATA_ENTSIZE(sfep->namelen);
+       }
+       ASSERT(i8count == sfp->hdr.i8count);
+#if !XFS_BIG_FILESYSTEMS
+       ASSERT(i8count == 0);
+#endif
+       ASSERT((char *)sfep - (char *)sfp == dp->i_d.di_size);
+       ASSERT(offset +
+              (sfp->hdr.count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
+              (uint)sizeof(xfs_dir2_block_tail_t) <=
+              dp->i_mount->m_dirblksize);
+}
+#endif /* DEBUG */
+
+/* 
+ * Create a new (shortform) directory.
+ */
+int                                    /* error, always 0 */
+xfs_dir2_sf_create(
+       xfs_da_args_t   *args,          /* operation arguments */
+       xfs_ino_t       pino)           /* parent inode number */
+{
+       xfs_inode_t     *dp;            /* incore directory inode */
+       int             i8count;        /* parent inode is an 8-byte number */
+       xfs_dir2_sf_t   *sfp;           /* shortform structure */
+       int             size;           /* directory size */
+
+       xfs_dir2_trace_args_i("sf_create", args, pino);
+       dp = args->dp;
+
+       ASSERT(dp != NULL);
+       ASSERT(dp->i_d.di_size == 0);
+       /*
+        * If it's currently a zero-length extent file, 
+        * convert it to local format.
+        */
+       if (dp->i_d.di_format == XFS_DINODE_FMT_EXTENTS) {
+               dp->i_df.if_flags &= ~XFS_IFEXTENTS;    /* just in case */
+               dp->i_d.di_format = XFS_DINODE_FMT_LOCAL;
+               xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE);
+               dp->i_df.if_flags |= XFS_IFINLINE;
+       }
+       ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
+       ASSERT(dp->i_df.if_bytes == 0);
+       i8count = pino > XFS_DIR2_MAX_SHORT_INUM;
+       size = XFS_DIR2_SF_HDR_SIZE(i8count);
+       /*
+        * Make a buffer for the data.
+        */
+       xfs_idata_realloc(dp, size, XFS_DATA_FORK);
+       /*
+        * Fill in the header,
+        */
+       sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+       sfp->hdr.i8count = i8count;
+       /*
+        * Now can put in the inode number, since i8count is set.
+        */
+       XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp, &pino, &sfp->hdr.parent, ARCH_CONVERT);
+       sfp->hdr.count = 0;
+       dp->i_d.di_size = size;
+       xfs_dir2_sf_check(args);
+       xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
+       return 0;
+}
+
+/*
+ * Lookup an entry in a shortform directory.
+ * Returns EEXIST if found, ENOENT if not found.
+ */
+int                                            /* error */
+xfs_dir2_sf_lookup(
+       xfs_da_args_t           *args)          /* operation arguments */
+{
+       xfs_inode_t             *dp;            /* incore directory inode */
+       int                     i;              /* entry index */
+       xfs_dir2_sf_entry_t     *sfep;          /* shortform directory entry */
+       xfs_dir2_sf_t           *sfp;           /* shortform structure */
+
+       xfs_dir2_trace_args("sf_lookup", args);
+       xfs_dir2_sf_check(args);
+       dp = args->dp;
+
+       ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
+       /*
+        * Bail out if the directory is way too short.
+        */
+       if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) {
+#pragma mips_frequency_hint NEVER
+               ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
+               return XFS_ERROR(EIO);
+       }
+       ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
+       ASSERT(dp->i_df.if_u1.if_data != NULL);
+       sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+       ASSERT(dp->i_d.di_size >= XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count));
+       /*
+        * Special case for .
+        */
+       if (args->namelen == 1 && args->name[0] == '.') {
+               args->inumber = dp->i_ino;
+               return XFS_ERROR(EEXIST);
+       }
+       /*
+        * Special case for ..
+        */
+       if (args->namelen == 2 &&
+           args->name[0] == '.' && args->name[1] == '.') {
+               args->inumber = XFS_DIR2_SF_GET_INUMBER_ARCH(sfp, &sfp->hdr.parent, ARCH_CONVERT);
+               return XFS_ERROR(EEXIST);
+       }
+       /*
+        * Loop over all the entries trying to match ours.
+        */
+       for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp);
+            i < sfp->hdr.count;
+            i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep)) {
+               if (sfep->namelen == args->namelen &&
+                   sfep->name[0] == args->name[0] &&
+                   bcmp(args->name, sfep->name, args->namelen) == 0) {
+                       args->inumber =
+                               XFS_DIR2_SF_GET_INUMBER_ARCH(sfp,
+                                       XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT);
+                       return XFS_ERROR(EEXIST);
+               }
+       }
+       /*
+        * Didn't find it.
+        */
+       ASSERT(args->oknoent);
+       return XFS_ERROR(ENOENT);
+}
+
+/*
+ * Remove an entry from a shortform directory.
+ */
+int                                            /* error */
+xfs_dir2_sf_removename(
+       xfs_da_args_t           *args)
+{
+       int                     byteoff;        /* offset of removed entry */
+       xfs_inode_t             *dp;            /* incore directory inode */
+       int                     entsize;        /* this entry's size */
+       int                     i;              /* shortform entry index */
+       int                     newsize;        /* new inode size */
+       int                     oldsize;        /* old inode size */
+       xfs_dir2_sf_entry_t     *sfep;          /* shortform directory entry */
+       xfs_dir2_sf_t           *sfp;           /* shortform structure */
+
+       xfs_dir2_trace_args("sf_removename", args);
+       dp = args->dp;
+
+       ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
+       oldsize = (int)dp->i_d.di_size;
+       /*
+        * Bail out if the directory is way too short.
+        */
+       if (oldsize < offsetof(xfs_dir2_sf_hdr_t, parent)) {
+#pragma mips_frequency_hint NEVER
+               ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
+               return XFS_ERROR(EIO);
+       }
+       ASSERT(dp->i_df.if_bytes == oldsize);
+       ASSERT(dp->i_df.if_u1.if_data != NULL);
+       sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+       ASSERT(oldsize >= XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count));
+       /*
+        * Loop over the old directory entries.
+        * Find the one we're deleting.
+        */
+       for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp);
+            i < sfp->hdr.count;
+            i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep)) {
+               if (sfep->namelen == args->namelen &&
+                   sfep->name[0] == args->name[0] &&
+                   bcmp(sfep->name, args->name, args->namelen) == 0) {
+                       ASSERT(XFS_DIR2_SF_GET_INUMBER_ARCH(sfp,
+                                       XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT) ==
+                               args->inumber);
+                       break;
+               }
+       }
+       /*
+        * Didn't find it.
+        */
+       if (i == sfp->hdr.count) {
+#pragma mips_frequency_hint NEVER
+               return XFS_ERROR(ENOENT);
+       }
+       /*
+        * Calculate sizes.
+        */
+       byteoff = (int)((char *)sfep - (char *)sfp);
+       entsize = XFS_DIR2_SF_ENTSIZE_BYNAME(sfp, args->namelen);
+       newsize = oldsize - entsize;
+       /*
+        * Copy the part if any after the removed entry, sliding it down.
+        */
+       if (byteoff + entsize < oldsize)
+               ovbcopy((char *)sfp + byteoff + entsize, (char *)sfp + byteoff,
+                       oldsize - (byteoff + entsize));
+       /*
+        * Fix up the header and file size.
+        */
+       sfp->hdr.count--;
+       dp->i_d.di_size = newsize;
+       /*
+        * Reallocate, making it smaller.
+        */
+       xfs_idata_realloc(dp, newsize - oldsize, XFS_DATA_FORK);
+       sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+#if XFS_BIG_FILESYSTEMS
+       /*
+        * Are we changing inode number size?
+        */
+       if (args->inumber > XFS_DIR2_MAX_SHORT_INUM) {
+#pragma mips_frequency_hint NEVER
+               if (sfp->hdr.i8count == 1)
+                       xfs_dir2_sf_toino4(args);
+               else
+                       sfp->hdr.i8count--;
+       }
+#endif
+       xfs_dir2_sf_check(args);
+       xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
+       return 0;
+}
+
+/*
+ * Replace the inode number of an entry in a shortform directory.
+ */
+int                                            /* error */
+xfs_dir2_sf_replace(
+       xfs_da_args_t           *args)          /* operation arguments */
+{
+       xfs_inode_t             *dp;            /* incore directory inode */
+       int                     i;              /* entry index */
+#if XFS_BIG_FILESYSTEMS || defined(DEBUG)
+       xfs_ino_t               ino;            /* entry old inode number */
+#endif
+       xfs_dir2_sf_entry_t     *sfep;          /* shortform directory entry */
+       xfs_dir2_sf_t           *sfp;           /* shortform structure */
+
+       xfs_dir2_trace_args("sf_replace", args);
+       dp = args->dp;
+
+       ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
+       /*
+        * Bail out if the shortform directory is way too small.
+        */
+       if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) {
+#pragma mips_frequency_hint NEVER
+               ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
+               return XFS_ERROR(EIO);
+       }
+       ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
+       ASSERT(dp->i_df.if_u1.if_data != NULL);
+       sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+       ASSERT(dp->i_d.di_size >= XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count));
+#if XFS_BIG_FILESYSTEMS
+       /*
+        * New inode number is large, and need to convert to 8-byte inodes.
+        */
+       if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->hdr.i8count == 0) {
+#pragma mips_frequency_hint NEVER
+               int     error;                  /* error return value */
+               int     newsize;                /* new inode size */
+
+               newsize =
+                       dp->i_df.if_bytes +
+                       (sfp->hdr.count + 1) *
+                       ((uint)sizeof(xfs_dir2_ino8_t) -
+                        (uint)sizeof(xfs_dir2_ino4_t));
+               /*
+                * Won't fit as shortform, convert to block then do replace.
+                */
+               if (newsize > XFS_IFORK_DSIZE(dp)) {
+                       error = xfs_dir2_sf_to_block(args);
+                       if (error) {
+                               return error;
+                       }
+                       return xfs_dir2_block_replace(args);
+               }
+               /*
+                * Still fits, convert to 8-byte now.
+                */
+               xfs_dir2_sf_toino8(args);
+               sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+       }
+#endif
+       ASSERT(args->namelen != 1 || args->name[0] != '.');
+       /*
+        * Replace ..'s entry.
+        */
+       if (args->namelen == 2 &&
+           args->name[0] == '.' && args->name[1] == '.') {
+#if XFS_BIG_FILESYSTEMS || defined(DEBUG)
+               ino = XFS_DIR2_SF_GET_INUMBER_ARCH(sfp, &sfp->hdr.parent, ARCH_CONVERT);
+               ASSERT(args->inumber != ino);
+#endif
+               XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp, &args->inumber, &sfp->hdr.parent, ARCH_CONVERT);
+       }
+       /*
+        * Normal entry, look for the name.
+        */
+       else {
+               for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp);
+                    i < sfp->hdr.count; 
+                    i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep)) {
+                       if (sfep->namelen == args->namelen &&
+                           sfep->name[0] == args->name[0] &&
+                           bcmp(args->name, sfep->name, args->namelen) == 0) {
+#if XFS_BIG_FILESYSTEMS || defined(DEBUG)
+                               ino = XFS_DIR2_SF_GET_INUMBER_ARCH(sfp,
+                                       XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT);
+                               ASSERT(args->inumber != ino);
+#endif
+                               XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp, &args->inumber,
+                                       XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT);
+                               break;
+                       }
+               }
+               /*
+                * Didn't find it.
+                */
+               if (i == sfp->hdr.count) {
+#pragma mips_frequency_hint NEVER
+                       ASSERT(args->oknoent);
+                       return XFS_ERROR(ENOENT);
+               }
+       }
+#if XFS_BIG_FILESYSTEMS
+       /*
+        * See if the old number was large, the new number is small.
+        */
+       if (ino > XFS_DIR2_MAX_SHORT_INUM &&
+           args->inumber <= XFS_DIR2_MAX_SHORT_INUM) {
+#pragma mips_frequency_hint NEVER
+               /*
+                * And the old count was one, so need to convert to small.
+                */
+               if (sfp->hdr.i8count == 1)
+                       xfs_dir2_sf_toino4(args);
+               else
+                       sfp->hdr.i8count--;
+       }
+#endif
+       xfs_dir2_sf_check(args);
+       xfs_trans_log_inode(args->trans, dp, XFS_ILOG_DDATA);
+       return 0;
+}
+
+#if XFS_BIG_FILESYSTEMS
+/*
+ * Convert from 8-byte inode numbers to 4-byte inode numbers.
+ * The last 8-byte inode number is gone, but the count is still 1.
+ */
+STATIC void
+xfs_dir2_sf_toino4(
+       xfs_da_args_t           *args)          /* operation arguments */
+{
+       char                    *buf;           /* old dir's buffer */
+       xfs_inode_t             *dp;            /* incore directory inode */
+       int                     i;              /* entry index */
+       xfs_ino_t               ino;            /* entry inode number */
+       int                     newsize;        /* new inode size */
+       xfs_dir2_sf_entry_t     *oldsfep;       /* old sf entry */
+       xfs_dir2_sf_t           *oldsfp;        /* old sf directory */
+       int                     oldsize;        /* old inode size */
+       xfs_dir2_sf_entry_t     *sfep;          /* new sf entry */
+       xfs_dir2_sf_t           *sfp;           /* new sf directory */
+
+       xfs_dir2_trace_args("sf_toino4", args);
+       dp = args->dp;
+
+       /*
+        * Copy the old directory to the buffer.
+        * Then nuke it from the inode, and add the new buffer to the inode.
+        * Don't want xfs_idata_realloc copying the data here.
+        */
+       oldsize = dp->i_df.if_bytes;
+       buf = kmem_alloc(oldsize, KM_SLEEP);
+       oldsfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+       ASSERT(oldsfp->hdr.i8count == 1);
+       bcopy(oldsfp, buf, oldsize);
+       /*
+        * Compute the new inode size.
+        */
+       newsize =
+               oldsize -
+               (oldsfp->hdr.count + 1) *
+               ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t));
+       xfs_idata_realloc(dp, -oldsize, XFS_DATA_FORK);
+       xfs_idata_realloc(dp, newsize, XFS_DATA_FORK);
+       /*
+        * Reset our pointers, the data has moved.
+        */
+       oldsfp = (xfs_dir2_sf_t *)buf;
+       sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+       /*
+        * Fill in the new header.
+        */
+       sfp->hdr.count = oldsfp->hdr.count;
+       sfp->hdr.i8count = 0;
+       ino = XFS_DIR2_SF_GET_INUMBER_ARCH(oldsfp, &oldsfp->hdr.parent, ARCH_CONVERT);
+       XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp, &ino, &sfp->hdr.parent, ARCH_CONVERT);
+       /*
+        * Copy the entries field by field.
+        */
+       for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp),
+                   oldsfep = XFS_DIR2_SF_FIRSTENTRY(oldsfp);
+            i < sfp->hdr.count;
+            i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep),
+                 oldsfep = XFS_DIR2_SF_NEXTENTRY(oldsfp, oldsfep)) {
+               sfep->namelen = oldsfep->namelen;
+               sfep->offset = oldsfep->offset;
+               bcopy(oldsfep->name, sfep->name, sfep->namelen);
+               ino = XFS_DIR2_SF_GET_INUMBER_ARCH(oldsfp,
+                       XFS_DIR2_SF_INUMBERP(oldsfep), ARCH_CONVERT);
+               XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp, &ino, XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT);
+       }
+       /*
+        * Clean up the inode.
+        */
+       kmem_free(buf, oldsize);
+       dp->i_d.di_size = newsize;
+       xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
+}
+
+/*
+ * Convert from 4-byte inode numbers to 8-byte inode numbers.
+ * The new 8-byte inode number is not there yet, we leave with the
+ * count 1 but no corresponding entry.
+ */
+STATIC void
+xfs_dir2_sf_toino8(
+       xfs_da_args_t           *args)          /* operation arguments */
+{
+       char                    *buf;           /* old dir's buffer */
+       xfs_inode_t             *dp;            /* incore directory inode */
+       int                     i;              /* entry index */
+       xfs_ino_t               ino;            /* entry inode number */
+       int                     newsize;        /* new inode size */
+       xfs_dir2_sf_entry_t     *oldsfep;       /* old sf entry */
+       xfs_dir2_sf_t           *oldsfp;        /* old sf directory */
+       int                     oldsize;        /* old inode size */
+       xfs_dir2_sf_entry_t     *sfep;          /* new sf entry */
+       xfs_dir2_sf_t           *sfp;           /* new sf directory */
+
+       xfs_dir2_trace_args("sf_toino8", args);
+       dp = args->dp;
+
+       /*
+        * Copy the old directory to the buffer.
+        * Then nuke it from the inode, and add the new buffer to the inode.
+        * Don't want xfs_idata_realloc copying the data here.
+        */
+       oldsize = dp->i_df.if_bytes;
+       buf = kmem_alloc(oldsize, KM_SLEEP);
+       oldsfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+       ASSERT(oldsfp->hdr.i8count == 0);
+       bcopy(oldsfp, buf, oldsize);
+       /*
+        * Compute the new inode size.
+        */
+       newsize =
+               oldsize +
+               (oldsfp->hdr.count + 1) *
+               ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t));
+       xfs_idata_realloc(dp, -oldsize, XFS_DATA_FORK);
+       xfs_idata_realloc(dp, newsize, XFS_DATA_FORK);
+       /*
+        * Reset our pointers, the data has moved.
+        */
+       oldsfp = (xfs_dir2_sf_t *)buf;
+       sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+       /*
+        * Fill in the new header.
+        */
+       sfp->hdr.count = oldsfp->hdr.count;
+       sfp->hdr.i8count = 1;
+       ino = XFS_DIR2_SF_GET_INUMBER_ARCH(oldsfp, &oldsfp->hdr.parent, ARCH_CONVERT);
+       XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp, &ino, &sfp->hdr.parent, ARCH_CONVERT);
+       /*
+        * Copy the entries field by field.
+        */
+       for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp),
+                   oldsfep = XFS_DIR2_SF_FIRSTENTRY(oldsfp);
+            i < sfp->hdr.count;
+            i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep),
+                 oldsfep = XFS_DIR2_SF_NEXTENTRY(oldsfp, oldsfep)) {
+               sfep->namelen = oldsfep->namelen;
+               sfep->offset = oldsfep->offset;
+               bcopy(oldsfep->name, sfep->name, sfep->namelen);
+               ino = XFS_DIR2_SF_GET_INUMBER_ARCH(oldsfp,
+                       XFS_DIR2_SF_INUMBERP(oldsfep), ARCH_CONVERT);
+               XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp, &ino, XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT);
+       }
+       /*
+        * Clean up the inode.
+        */
+       kmem_free(buf, oldsize);
+       dp->i_d.di_size = newsize;
+       xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
+}
+#endif /* XFS_BIG_FILESYSTEMS */
diff --git a/libxfs/xfs_dir_leaf.c b/libxfs/xfs_dir_leaf.c
new file mode 100644 (file)
index 0000000..40c1214
--- /dev/null
@@ -0,0 +1,1695 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <xfs.h>
+
+/*
+ * xfs_dir_leaf.c
+ *
+ * Routines to implement leaf blocks of directories as Btrees of hashed names.
+ */
+
+/*
+ * Validate a given inode number.
+ */
+int
+xfs_dir_ino_validate(xfs_mount_t *mp, xfs_ino_t ino)
+{
+       xfs_agblock_t   agblkno;
+       xfs_agino_t     agino;
+       xfs_agnumber_t  agno;
+       int             ino_ok;
+       int             ioff;
+
+       agno = XFS_INO_TO_AGNO(mp, ino);
+       agblkno = XFS_INO_TO_AGBNO(mp, ino);
+       ioff = XFS_INO_TO_OFFSET(mp, ino);
+       agino = XFS_OFFBNO_TO_AGINO(mp, agblkno, ioff);
+       ino_ok =
+               agno < mp->m_sb.sb_agcount &&
+               agblkno < mp->m_sb.sb_agblocks &&
+               agblkno != 0 &&
+               ioff < (1 << mp->m_sb.sb_inopblog) &&
+               XFS_AGINO_TO_INO(mp, agno, agino) == ino;
+       if (XFS_TEST_ERROR(!ino_ok, mp, XFS_ERRTAG_DIR_INO_VALIDATE,
+                       XFS_RANDOM_DIR_INO_VALIDATE)) {
+               xfs_fs_cmn_err(CE_WARN, mp,
+                       "Invalid inode number 0x%Lx\n", ino);
+               return XFS_ERROR(EFSCORRUPTED);
+       }
+       return 0;
+}
+
+/*
+ * Create the initial contents of a shortform directory.
+ */
+int
+xfs_dir_shortform_create(xfs_da_args_t *args, xfs_ino_t parent)
+{
+       xfs_dir_sf_hdr_t *hdr;
+       xfs_inode_t *dp;
+
+       dp = args->dp;
+       ASSERT(dp != NULL);
+       ASSERT(dp->i_d.di_size == 0);
+       if (dp->i_d.di_format == XFS_DINODE_FMT_EXTENTS) {
+               dp->i_df.if_flags &= ~XFS_IFEXTENTS;    /* just in case */
+               dp->i_d.di_format = XFS_DINODE_FMT_LOCAL;
+               xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE);
+               dp->i_df.if_flags |= XFS_IFINLINE;
+       }
+       ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
+       ASSERT(dp->i_df.if_bytes == 0);
+       xfs_idata_realloc(dp, sizeof(*hdr), XFS_DATA_FORK);
+       hdr = (xfs_dir_sf_hdr_t *)dp->i_df.if_u1.if_data;
+       XFS_DIR_SF_PUT_DIRINO_ARCH(&parent, &hdr->parent, ARCH_CONVERT);
+
+       INT_ZERO(hdr->count, ARCH_CONVERT);
+       dp->i_d.di_size = sizeof(*hdr);
+       xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
+       return(0);
+}
+
+/*
+ * Add a name to the shortform directory structure.
+ * Overflow from the inode has already been checked for.
+ */
+int
+xfs_dir_shortform_addname(xfs_da_args_t *args)
+{
+       xfs_dir_shortform_t *sf;
+       xfs_dir_sf_entry_t *sfe;
+       int i, offset, size;
+       xfs_inode_t *dp;
+
+       dp = args->dp;
+       ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
+       /*
+        * Catch the case where the conversion from shortform to leaf
+        * failed part way through.
+        */
+       if (dp->i_d.di_size < sizeof(xfs_dir_sf_hdr_t)) {
+#pragma mips_frequency_hint NEVER
+               ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
+               return XFS_ERROR(EIO);
+       }
+       ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
+       ASSERT(dp->i_df.if_u1.if_data != NULL);
+       sf = (xfs_dir_shortform_t *)dp->i_df.if_u1.if_data;
+       sfe = &sf->list[0];
+       for (i = INT_GET(sf->hdr.count, ARCH_CONVERT)-1; i >= 0; i--) {
+               if (sfe->namelen == args->namelen &&
+                   args->name[0] == sfe->name[0] &&
+                   bcmp(args->name, sfe->name, args->namelen) == 0)
+                       return(XFS_ERROR(EEXIST));
+               sfe = XFS_DIR_SF_NEXTENTRY(sfe);
+       }
+
+       offset = (int)((char *)sfe - (char *)sf);
+       size = XFS_DIR_SF_ENTSIZE_BYNAME(args->namelen);
+       xfs_idata_realloc(dp, size, XFS_DATA_FORK);
+       sf = (xfs_dir_shortform_t *)dp->i_df.if_u1.if_data;
+       sfe = (xfs_dir_sf_entry_t *)((char *)sf + offset);
+
+       XFS_DIR_SF_PUT_DIRINO_ARCH(&args->inumber, &sfe->inumber, ARCH_CONVERT);
+       sfe->namelen = args->namelen;
+       bcopy(args->name, sfe->name, sfe->namelen);
+       INT_MOD(sf->hdr.count, ARCH_CONVERT, +1);
+
+       dp->i_d.di_size += size;
+       xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
+
+       return(0);
+}
+
+/*
+ * Remove a name from the shortform directory structure.
+ */
+int
+xfs_dir_shortform_removename(xfs_da_args_t *args)
+{
+       xfs_dir_shortform_t *sf;
+       xfs_dir_sf_entry_t *sfe;
+       int base, size, i;
+       xfs_inode_t *dp;
+
+       dp = args->dp;
+       ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
+       /*
+        * Catch the case where the conversion from shortform to leaf
+        * failed part way through.
+        */
+       if (dp->i_d.di_size < sizeof(xfs_dir_sf_hdr_t)) {
+#pragma mips_frequency_hint NEVER
+               ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
+               return XFS_ERROR(EIO);
+       }
+       ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
+       ASSERT(dp->i_df.if_u1.if_data != NULL);
+       base = sizeof(xfs_dir_sf_hdr_t);
+       sf = (xfs_dir_shortform_t *)dp->i_df.if_u1.if_data;
+       sfe = &sf->list[0];
+       for (i = INT_GET(sf->hdr.count, ARCH_CONVERT)-1; i >= 0; i--) {
+               size = XFS_DIR_SF_ENTSIZE_BYENTRY(sfe);
+               if (sfe->namelen == args->namelen &&
+                   sfe->name[0] == args->name[0] &&
+                   bcmp(sfe->name, args->name, args->namelen) == 0)
+                       break;
+               base += size;
+               sfe = XFS_DIR_SF_NEXTENTRY(sfe);
+       }
+       if (i < 0) {
+               ASSERT(args->oknoent);
+               return(XFS_ERROR(ENOENT));
+       }
+
+       if ((base + size) != dp->i_d.di_size) {
+               ovbcopy(&((char *)sf)[base+size], &((char *)sf)[base],
+                                             dp->i_d.di_size - (base+size));
+       }
+       INT_MOD(sf->hdr.count, ARCH_CONVERT, -1);
+
+       xfs_idata_realloc(dp, -size, XFS_DATA_FORK);
+       dp->i_d.di_size -= size;
+       xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
+
+       return(0);
+}
+
+/*
+ * Look up a name in a shortform directory structure.
+ */
+int
+xfs_dir_shortform_lookup(xfs_da_args_t *args)
+{
+       xfs_dir_shortform_t *sf;
+       xfs_dir_sf_entry_t *sfe;
+       int i;
+       xfs_inode_t *dp;
+
+       dp = args->dp;
+       ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
+       /*
+        * Catch the case where the conversion from shortform to leaf
+        * failed part way through.
+        */
+       if (dp->i_d.di_size < sizeof(xfs_dir_sf_hdr_t)) {
+#pragma mips_frequency_hint NEVER
+               ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
+               return XFS_ERROR(EIO);
+       }
+       ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
+       ASSERT(dp->i_df.if_u1.if_data != NULL);
+       sf = (xfs_dir_shortform_t *)dp->i_df.if_u1.if_data;
+       if (args->namelen == 2 &&
+           args->name[0] == '.' && args->name[1] == '.') {
+               XFS_DIR_SF_GET_DIRINO_ARCH(&sf->hdr.parent, &args->inumber, ARCH_CONVERT);
+               return(XFS_ERROR(EEXIST));
+       }
+       if (args->namelen == 1 && args->name[0] == '.') {
+               args->inumber = dp->i_ino;
+               return(XFS_ERROR(EEXIST));
+       }
+       sfe = &sf->list[0];
+       for (i = INT_GET(sf->hdr.count, ARCH_CONVERT)-1; i >= 0; i--) {
+               if (sfe->namelen == args->namelen &&
+                   sfe->name[0] == args->name[0] &&
+                   bcmp(args->name, sfe->name, args->namelen) == 0) {
+                       XFS_DIR_SF_GET_DIRINO_ARCH(&sfe->inumber, &args->inumber, ARCH_CONVERT);
+                       return(XFS_ERROR(EEXIST));
+               }
+               sfe = XFS_DIR_SF_NEXTENTRY(sfe);
+       }
+       ASSERT(args->oknoent);
+       return(XFS_ERROR(ENOENT));
+}
+
+/*
+ * Convert from using the shortform to the leaf.
+ */
+int
+xfs_dir_shortform_to_leaf(xfs_da_args_t *iargs)
+{
+       xfs_inode_t *dp;
+       xfs_dir_shortform_t *sf;
+       xfs_dir_sf_entry_t *sfe;
+       xfs_da_args_t args;
+       xfs_ino_t inumber;
+       char *tmpbuffer;
+       int retval, i, size;
+       xfs_dablk_t blkno;
+       xfs_dabuf_t *bp;
+
+       dp = iargs->dp;
+       /*
+        * Catch the case where the conversion from shortform to leaf
+        * failed part way through.
+        */
+       if (dp->i_d.di_size < sizeof(xfs_dir_sf_hdr_t)) {
+#pragma mips_frequency_hint NEVER
+               ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
+               return XFS_ERROR(EIO);
+       }
+       ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
+       ASSERT(dp->i_df.if_u1.if_data != NULL);
+       size = dp->i_df.if_bytes;
+       tmpbuffer = kmem_alloc(size, KM_SLEEP);
+       ASSERT(tmpbuffer != NULL);
+
+       bcopy(dp->i_df.if_u1.if_data, tmpbuffer, size);
+
+       sf = (xfs_dir_shortform_t *)tmpbuffer;
+       XFS_DIR_SF_GET_DIRINO_ARCH(&sf->hdr.parent, &inumber, ARCH_CONVERT);
+        
+       xfs_idata_realloc(dp, -size, XFS_DATA_FORK);
+       dp->i_d.di_size = 0;
+       xfs_trans_log_inode(iargs->trans, dp, XFS_ILOG_CORE);
+       retval = xfs_da_grow_inode(iargs, &blkno);
+       if (retval)
+               goto out;
+
+       ASSERT(blkno == 0);
+       retval = xfs_dir_leaf_create(iargs, blkno, &bp);
+       if (retval)
+               goto out;
+       xfs_da_buf_done(bp);
+
+       args.name = ".";
+       args.namelen = 1;
+       args.hashval = xfs_dir_hash_dot;
+       args.inumber = dp->i_ino;
+       args.dp = dp;
+       args.firstblock = iargs->firstblock;
+       args.flist = iargs->flist;
+       args.total = iargs->total;
+       args.whichfork = XFS_DATA_FORK;
+       args.trans = iargs->trans;
+       args.justcheck = 0;
+       args.addname = args.oknoent = 1;
+       retval = xfs_dir_leaf_addname(&args);
+       if (retval)
+               goto out;
+
+       args.name = "..";
+       args.namelen = 2;
+       args.hashval = xfs_dir_hash_dotdot;
+       args.inumber = inumber;
+       retval = xfs_dir_leaf_addname(&args);
+       if (retval)
+               goto out;
+
+       sfe = &sf->list[0];
+       for (i = 0; i < INT_GET(sf->hdr.count, ARCH_CONVERT); i++) {
+               args.name = (char *)(sfe->name);
+               args.namelen = sfe->namelen;
+               args.hashval = xfs_da_hashname((char *)(sfe->name),
+                                              sfe->namelen);
+               XFS_DIR_SF_GET_DIRINO_ARCH(&sfe->inumber, &args.inumber, ARCH_CONVERT);
+               retval = xfs_dir_leaf_addname(&args);
+               if (retval)
+                       goto out;
+               sfe = XFS_DIR_SF_NEXTENTRY(sfe);
+       }
+       retval = 0;
+
+out:
+       kmem_free(tmpbuffer, size);
+       return(retval);
+}
+
+/*
+ * Look up a name in a shortform directory structure, replace the inode number.
+ */
+int
+xfs_dir_shortform_replace(xfs_da_args_t *args)
+{
+       xfs_dir_shortform_t *sf;
+       xfs_dir_sf_entry_t *sfe;
+       xfs_inode_t *dp;
+       int i;
+
+       dp = args->dp;
+       ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
+       /*
+        * Catch the case where the conversion from shortform to leaf
+        * failed part way through.
+        */
+       if (dp->i_d.di_size < sizeof(xfs_dir_sf_hdr_t)) {
+#pragma mips_frequency_hint NEVER
+               ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
+               return XFS_ERROR(EIO);
+       }
+       ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
+       ASSERT(dp->i_df.if_u1.if_data != NULL);
+       sf = (xfs_dir_shortform_t *)dp->i_df.if_u1.if_data;
+       if (args->namelen == 2 &&
+           args->name[0] == '.' && args->name[1] == '.') {
+                /* XXX - replace assert? */
+               XFS_DIR_SF_PUT_DIRINO_ARCH(&args->inumber, &sf->hdr.parent, ARCH_CONVERT);
+               xfs_trans_log_inode(args->trans, dp, XFS_ILOG_DDATA);
+               return(0);
+       }
+       ASSERT(args->namelen != 1 || args->name[0] != '.');
+       sfe = &sf->list[0];
+       for (i = INT_GET(sf->hdr.count, ARCH_CONVERT)-1; i >= 0; i--) {
+               if (sfe->namelen == args->namelen &&
+                   sfe->name[0] == args->name[0] &&
+                   bcmp(args->name, sfe->name, args->namelen) == 0) {
+                       ASSERT(bcmp((char *)&args->inumber,
+                               (char *)&sfe->inumber, sizeof(xfs_ino_t)));
+                       XFS_DIR_SF_PUT_DIRINO_ARCH(&args->inumber, &sfe->inumber, ARCH_CONVERT);
+                       xfs_trans_log_inode(args->trans, dp, XFS_ILOG_DDATA);
+                       return(0);
+               }
+               sfe = XFS_DIR_SF_NEXTENTRY(sfe);
+       }
+       ASSERT(args->oknoent);
+       return(XFS_ERROR(ENOENT));
+}
+
+/*
+ * Convert a leaf directory to shortform structure
+ */
+int
+xfs_dir_leaf_to_shortform(xfs_da_args_t *iargs)
+{
+       xfs_dir_leafblock_t *leaf;
+       xfs_dir_leaf_hdr_t *hdr;
+       xfs_dir_leaf_entry_t *entry;
+       xfs_dir_leaf_name_t *namest;
+       xfs_da_args_t args;
+       xfs_inode_t *dp;
+       xfs_ino_t parent;
+       char *tmpbuffer;
+       int retval, i;
+       xfs_dabuf_t *bp;
+
+       dp = iargs->dp;
+       tmpbuffer = kmem_alloc(XFS_LBSIZE(dp->i_mount), KM_SLEEP);
+       ASSERT(tmpbuffer != NULL);
+
+       retval = xfs_da_read_buf(iargs->trans, iargs->dp, 0, -1, &bp,
+                                              XFS_DATA_FORK);
+       if (retval)
+               return(retval);
+       ASSERT(bp != NULL);
+       bcopy(bp->data, tmpbuffer, XFS_LBSIZE(dp->i_mount));
+       leaf = (xfs_dir_leafblock_t *)tmpbuffer;
+       ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+       bzero(bp->data, XFS_LBSIZE(dp->i_mount));
+
+       /*
+        * Find and special case the parent inode number
+        */
+       hdr = &leaf->hdr;
+       entry = &leaf->entries[0];
+       for (i = INT_GET(hdr->count, ARCH_CONVERT)-1; i >= 0; entry++, i--) {
+               namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT));
+               if ((entry->namelen == 2) &&
+                   (namest->name[0] == '.') &&
+                   (namest->name[1] == '.')) {
+                       XFS_DIR_SF_GET_DIRINO_ARCH(&namest->inumber, &parent, ARCH_CONVERT);
+                       INT_ZERO(entry->nameidx, ARCH_CONVERT);
+               } else if ((entry->namelen == 1) && (namest->name[0] == '.')) {
+                       INT_ZERO(entry->nameidx, ARCH_CONVERT);
+               }
+       }
+       retval = xfs_da_shrink_inode(iargs, 0, bp);
+       if (retval)
+               goto out;
+       retval = xfs_dir_shortform_create(iargs, parent);
+       if (retval)
+               goto out;
+
+       /*
+        * Copy the rest of the filenames
+        */
+       entry = &leaf->entries[0];
+       args.dp = dp;
+       args.firstblock = iargs->firstblock;
+       args.flist = iargs->flist;
+       args.total = iargs->total;
+       args.whichfork = XFS_DATA_FORK;
+       args.trans = iargs->trans;
+       args.justcheck = 0;
+       args.addname = args.oknoent = 1;
+       for (i = 0; i < INT_GET(hdr->count, ARCH_CONVERT); entry++, i++) {
+               if (INT_GET(entry->nameidx, ARCH_CONVERT) == 0)
+                       continue;
+               namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT));
+               args.name = (char *)(namest->name);
+               args.namelen = entry->namelen;
+               args.hashval = INT_GET(entry->hashval, ARCH_CONVERT);
+               XFS_DIR_SF_GET_DIRINO_ARCH(&namest->inumber, &args.inumber, ARCH_CONVERT);
+               xfs_dir_shortform_addname(&args);
+       }
+
+out:
+       kmem_free(tmpbuffer, XFS_LBSIZE(dp->i_mount));
+       return(retval);
+}
+
+/*
+ * Convert from using a single leaf to a root node and a leaf.
+ */
+int
+xfs_dir_leaf_to_node(xfs_da_args_t *args)
+{
+       xfs_dir_leafblock_t *leaf;
+       xfs_da_intnode_t *node;
+       xfs_inode_t *dp;
+       xfs_dabuf_t *bp1, *bp2;
+       xfs_dablk_t blkno;
+       int retval;
+
+       dp = args->dp;
+       retval = xfs_da_grow_inode(args, &blkno);
+       ASSERT(blkno == 1);
+       if (retval)
+               return(retval);
+       retval = xfs_da_read_buf(args->trans, args->dp, 0, -1, &bp1,
+                                             XFS_DATA_FORK);
+       if (retval)
+               return(retval);
+       ASSERT(bp1 != NULL);
+       retval = xfs_da_get_buf(args->trans, args->dp, 1, -1, &bp2,
+                                            XFS_DATA_FORK);
+       if (retval) {
+               xfs_da_buf_done(bp1);
+               return(retval);
+       }
+       ASSERT(bp2 != NULL);
+       bcopy(bp1->data, bp2->data, XFS_LBSIZE(dp->i_mount));
+       xfs_da_buf_done(bp1);
+       xfs_da_log_buf(args->trans, bp2, 0, XFS_LBSIZE(dp->i_mount) - 1);
+
+       /*
+        * Set up the new root node.
+        */
+       retval = xfs_da_node_create(args, 0, 1, &bp1, XFS_DATA_FORK);
+       if (retval) {
+               xfs_da_buf_done(bp2);
+               return(retval);
+       }
+       node = bp1->data;
+       leaf = bp2->data;
+       ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+       INT_SET(node->btree[0].hashval, ARCH_CONVERT, INT_GET(leaf->entries[ INT_GET(leaf->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT));
+       xfs_da_buf_done(bp2);
+       INT_SET(node->btree[0].before, ARCH_CONVERT, blkno);
+       INT_SET(node->hdr.count, ARCH_CONVERT, 1);
+       xfs_da_log_buf(args->trans, bp1,
+               XFS_DA_LOGRANGE(node, &node->btree[0], sizeof(node->btree[0])));
+       xfs_da_buf_done(bp1);
+
+       return(retval);
+}
+
+
+/*========================================================================
+ * Routines used for growing the Btree.
+ *========================================================================*/
+
+/*
+ * Create the initial contents of a leaf directory
+ * or a leaf in a node directory.
+ */
+int
+xfs_dir_leaf_create(xfs_da_args_t *args, xfs_dablk_t blkno, xfs_dabuf_t **bpp)
+{
+       xfs_dir_leafblock_t *leaf;
+       xfs_dir_leaf_hdr_t *hdr;
+       xfs_inode_t *dp;
+       xfs_dabuf_t *bp;
+       int retval;
+
+       dp = args->dp;
+       ASSERT(dp != NULL);
+       retval = xfs_da_get_buf(args->trans, dp, blkno, -1, &bp, XFS_DATA_FORK);
+       if (retval)
+               return(retval);
+       ASSERT(bp != NULL);
+       leaf = bp->data;
+       bzero((char *)leaf, XFS_LBSIZE(dp->i_mount));
+       hdr = &leaf->hdr;
+       INT_SET(hdr->info.magic, ARCH_CONVERT, XFS_DIR_LEAF_MAGIC);
+       INT_SET(hdr->firstused, ARCH_CONVERT, XFS_LBSIZE(dp->i_mount));
+       if (INT_ISZERO(hdr->firstused, ARCH_CONVERT))
+               INT_SET(hdr->firstused, ARCH_CONVERT, XFS_LBSIZE(dp->i_mount) - 1);
+       INT_SET(hdr->freemap[0].base, ARCH_CONVERT, sizeof(xfs_dir_leaf_hdr_t));
+       INT_SET(hdr->freemap[0].size, ARCH_CONVERT, INT_GET(hdr->firstused, ARCH_CONVERT) - INT_GET(hdr->freemap[0].base, ARCH_CONVERT));
+
+       xfs_da_log_buf(args->trans, bp, 0, XFS_LBSIZE(dp->i_mount) - 1);
+
+       *bpp = bp;
+       return(0);
+}
+
+/*
+ * Split the leaf node, rebalance, then add the new entry.
+ */
+int
+xfs_dir_leaf_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
+                                 xfs_da_state_blk_t *newblk)
+{
+       xfs_dablk_t blkno;
+       xfs_da_args_t *args;
+       int error;
+
+       /*
+        * Allocate space for a new leaf node.
+        */
+       args = state->args;
+       ASSERT(args != NULL);
+       ASSERT(oldblk->magic == XFS_DIR_LEAF_MAGIC);
+       error = xfs_da_grow_inode(args, &blkno);
+       if (error)
+               return(error);
+       error = xfs_dir_leaf_create(args, blkno, &newblk->bp);
+       if (error)
+               return(error);
+       newblk->blkno = blkno;
+       newblk->magic = XFS_DIR_LEAF_MAGIC;
+
+       /*
+        * Rebalance the entries across the two leaves.
+        */
+       xfs_dir_leaf_rebalance(state, oldblk, newblk);
+       error = xfs_da_blk_link(state, oldblk, newblk);
+       if (error)
+               return(error);
+
+       /*
+        * Insert the new entry in the correct block.
+        */
+       if (state->inleaf) {
+               error = xfs_dir_leaf_add(oldblk->bp, args, oldblk->index);
+       } else {
+               error = xfs_dir_leaf_add(newblk->bp, args, newblk->index);
+       }
+
+       /*
+        * Update last hashval in each block since we added the name.
+        */
+       oldblk->hashval = xfs_dir_leaf_lasthash(oldblk->bp, NULL);
+       newblk->hashval = xfs_dir_leaf_lasthash(newblk->bp, NULL);
+       return(error);
+}
+
+/*
+ * Add a name to the leaf directory structure.
+ *
+ * Must take into account fragmented leaves and leaves where spacemap has
+ * lost some freespace information (ie: holes).
+ */
+int
+xfs_dir_leaf_add(xfs_dabuf_t *bp, xfs_da_args_t *args, int index)
+{
+       xfs_dir_leafblock_t *leaf;
+       xfs_dir_leaf_hdr_t *hdr;
+       xfs_dir_leaf_map_t *map;
+       int tablesize, entsize, sum, i, tmp, error;
+
+       leaf = bp->data;
+       ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+       ASSERT((index >= 0) && (index <= INT_GET(leaf->hdr.count, ARCH_CONVERT)));
+       hdr = &leaf->hdr;
+       entsize = XFS_DIR_LEAF_ENTSIZE_BYNAME(args->namelen);
+
+       /*
+        * Search through freemap for first-fit on new name length.
+        * (may need to figure in size of entry struct too)
+        */
+       tablesize = (INT_GET(hdr->count, ARCH_CONVERT) + 1) * (uint)sizeof(xfs_dir_leaf_entry_t)
+                       + (uint)sizeof(xfs_dir_leaf_hdr_t);
+       map = &hdr->freemap[XFS_DIR_LEAF_MAPSIZE-1];
+       for (sum = 0, i = XFS_DIR_LEAF_MAPSIZE-1; i >= 0; map--, i--) {
+               if (tablesize > INT_GET(hdr->firstused, ARCH_CONVERT)) {
+                       sum += INT_GET(map->size, ARCH_CONVERT);
+                       continue;
+               }
+               if (INT_GET(map->size, ARCH_CONVERT) == 0)
+                       continue;       /* no space in this map */
+               tmp = entsize;
+               if (INT_GET(map->base, ARCH_CONVERT) < INT_GET(hdr->firstused, ARCH_CONVERT))
+                       tmp += (uint)sizeof(xfs_dir_leaf_entry_t);
+               if (INT_GET(map->size, ARCH_CONVERT) >= tmp) {
+                       if (!args->justcheck)
+                               xfs_dir_leaf_add_work(bp, args, index, i);
+                       return(0);
+               }
+               sum += INT_GET(map->size, ARCH_CONVERT);
+       }
+
+       /*
+        * If there are no holes in the address space of the block,
+        * and we don't have enough freespace, then compaction will do us
+        * no good and we should just give up.
+        */
+       if (!hdr->holes && (sum < entsize))
+               return(XFS_ERROR(ENOSPC));
+
+       /*
+        * Compact the entries to coalesce free space.
+        * Pass the justcheck flag so the checking pass can return 
+        * an error, without changing anything, if it won't fit.
+        */
+       error = xfs_dir_leaf_compact(args->trans, bp,
+                       args->total == 0 ?
+                               entsize +
+                               (uint)sizeof(xfs_dir_leaf_entry_t) : 0,
+                       args->justcheck);
+       if (error)
+               return(error);
+       /*
+        * After compaction, the block is guaranteed to have only one
+        * free region, in freemap[0].  If it is not big enough, give up.
+        */
+       if (INT_GET(hdr->freemap[0].size, ARCH_CONVERT) <
+           (entsize + (uint)sizeof(xfs_dir_leaf_entry_t)))
+               return(XFS_ERROR(ENOSPC));
+
+       if (!args->justcheck)
+               xfs_dir_leaf_add_work(bp, args, index, 0);
+       return(0);
+}
+
+/*
+ * Add a name to a leaf directory structure.
+ */
+STATIC void
+xfs_dir_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int index,
+                     int mapindex)
+{
+       xfs_dir_leafblock_t *leaf;
+       xfs_dir_leaf_hdr_t *hdr;
+       xfs_dir_leaf_entry_t *entry;
+       xfs_dir_leaf_name_t *namest;
+       xfs_dir_leaf_map_t *map;
+       /* REFERENCED */
+       xfs_mount_t *mp;
+       int tmp, i;
+
+       leaf = bp->data;
+       ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+       hdr = &leaf->hdr;
+       ASSERT((mapindex >= 0) && (mapindex < XFS_DIR_LEAF_MAPSIZE));
+       ASSERT((index >= 0) && (index <= INT_GET(hdr->count, ARCH_CONVERT)));
+
+       /*
+        * Force open some space in the entry array and fill it in.
+        */
+       entry = &leaf->entries[index];
+       if (index < INT_GET(hdr->count, ARCH_CONVERT)) {
+               tmp  = INT_GET(hdr->count, ARCH_CONVERT) - index;
+               tmp *= (uint)sizeof(xfs_dir_leaf_entry_t);
+               ovbcopy(entry, entry + 1, tmp);
+               xfs_da_log_buf(args->trans, bp,
+                   XFS_DA_LOGRANGE(leaf, entry, tmp + (uint)sizeof(*entry)));
+       }
+       INT_MOD(hdr->count, ARCH_CONVERT, +1);
+
+       /*
+        * Allocate space for the new string (at the end of the run).
+        */
+       map = &hdr->freemap[mapindex];
+       mp = args->trans->t_mountp;
+       ASSERT(INT_GET(map->base, ARCH_CONVERT) < XFS_LBSIZE(mp));
+       ASSERT(INT_GET(map->size, ARCH_CONVERT) >= XFS_DIR_LEAF_ENTSIZE_BYNAME(args->namelen));
+       ASSERT(INT_GET(map->size, ARCH_CONVERT) < XFS_LBSIZE(mp));
+       INT_MOD(map->size, ARCH_CONVERT, -(XFS_DIR_LEAF_ENTSIZE_BYNAME(args->namelen)));
+       INT_SET(entry->nameidx, ARCH_CONVERT, INT_GET(map->base, ARCH_CONVERT) + INT_GET(map->size, ARCH_CONVERT));
+       INT_SET(entry->hashval, ARCH_CONVERT, args->hashval);
+       entry->namelen = args->namelen;
+       xfs_da_log_buf(args->trans, bp,
+           XFS_DA_LOGRANGE(leaf, entry, sizeof(*entry)));
+
+       /*
+        * Copy the string and inode number into the new space.
+        */
+       namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT));
+       XFS_DIR_SF_PUT_DIRINO_ARCH(&args->inumber, &namest->inumber, ARCH_CONVERT);
+       bcopy(args->name, namest->name, args->namelen);
+       xfs_da_log_buf(args->trans, bp,
+           XFS_DA_LOGRANGE(leaf, namest, XFS_DIR_LEAF_ENTSIZE_BYENTRY(entry)));
+
+       /*
+        * Update the control info for this leaf node
+        */
+       if (INT_GET(entry->nameidx, ARCH_CONVERT) < INT_GET(hdr->firstused, ARCH_CONVERT))
+               INT_COPY(hdr->firstused, entry->nameidx, ARCH_CONVERT);
+       ASSERT(INT_GET(hdr->firstused, ARCH_CONVERT) >= ((INT_GET(hdr->count, ARCH_CONVERT)*sizeof(*entry))+sizeof(*hdr)));
+       tmp = (INT_GET(hdr->count, ARCH_CONVERT)-1) * (uint)sizeof(xfs_dir_leaf_entry_t)
+                       + (uint)sizeof(xfs_dir_leaf_hdr_t);
+       map = &hdr->freemap[0];
+       for (i = 0; i < XFS_DIR_LEAF_MAPSIZE; map++, i++) {
+               if (INT_GET(map->base, ARCH_CONVERT) == tmp) {
+                       INT_MOD(map->base, ARCH_CONVERT, (uint)sizeof(xfs_dir_leaf_entry_t));
+                       INT_MOD(map->size, ARCH_CONVERT, -((uint)sizeof(xfs_dir_leaf_entry_t)));
+               }
+       }
+       INT_MOD(hdr->namebytes, ARCH_CONVERT, args->namelen);
+       xfs_da_log_buf(args->trans, bp,
+               XFS_DA_LOGRANGE(leaf, hdr, sizeof(*hdr)));
+}
+
+/*
+ * Garbage collect a leaf directory block by copying it to a new buffer.
+ */
+STATIC int
+xfs_dir_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *bp, int musthave,
+                    int justcheck)
+{
+       xfs_dir_leafblock_t *leaf_s, *leaf_d;
+       xfs_dir_leaf_hdr_t *hdr_s, *hdr_d;
+       xfs_mount_t *mp;
+       char *tmpbuffer;
+       char *tmpbuffer2;
+       int rval;
+       int lbsize;
+
+       mp = trans->t_mountp;
+       lbsize = XFS_LBSIZE(mp);
+       tmpbuffer = kmem_alloc(lbsize, KM_SLEEP);
+       ASSERT(tmpbuffer != NULL);
+       bcopy(bp->data, tmpbuffer, lbsize);
+
+       /*
+        * Make a second copy in case xfs_dir_leaf_moveents()
+        * below destroys the original.
+        */
+       if (musthave || justcheck) {
+               tmpbuffer2 = kmem_alloc(lbsize, KM_SLEEP);
+               bcopy(bp->data, tmpbuffer2, lbsize);
+       } 
+       bzero(bp->data, lbsize);
+
+       /*
+        * Copy basic information
+        */
+       leaf_s = (xfs_dir_leafblock_t *)tmpbuffer;
+       leaf_d = bp->data;
+       hdr_s = &leaf_s->hdr;
+       hdr_d = &leaf_d->hdr;
+       hdr_d->info = hdr_s->info;      /* struct copy */
+       INT_SET(hdr_d->firstused, ARCH_CONVERT, lbsize);
+       if (INT_GET(hdr_d->firstused, ARCH_CONVERT) == 0)
+               INT_SET(hdr_d->firstused, ARCH_CONVERT, lbsize - 1);
+       INT_ZERO(hdr_d->namebytes, ARCH_CONVERT);
+       INT_ZERO(hdr_d->count, ARCH_CONVERT);
+       hdr_d->holes = 0;
+       INT_SET(hdr_d->freemap[0].base, ARCH_CONVERT, sizeof(xfs_dir_leaf_hdr_t));
+       INT_SET(hdr_d->freemap[0].size, ARCH_CONVERT, INT_GET(hdr_d->firstused, ARCH_CONVERT) - INT_GET(hdr_d->freemap[0].base, ARCH_CONVERT));
+
+       /*
+        * Copy all entry's in the same (sorted) order,
+        * but allocate filenames packed and in sequence.
+        * This changes the source (leaf_s) as well.
+        */
+       xfs_dir_leaf_moveents(leaf_s, 0, leaf_d, 0, (int)INT_GET(hdr_s->count, ARCH_CONVERT), mp);
+
+       if (musthave && INT_GET(hdr_d->freemap[0].size, ARCH_CONVERT) < musthave)
+               rval = XFS_ERROR(ENOSPC);
+       else
+               rval = 0;
+       
+       if (justcheck || rval == ENOSPC) {
+               ASSERT(tmpbuffer2);
+               bcopy(tmpbuffer2, bp->data, lbsize);
+       } else {
+               xfs_da_log_buf(trans, bp, 0, lbsize - 1);
+       }
+
+       kmem_free(tmpbuffer, lbsize);
+       if (musthave || justcheck)
+               kmem_free(tmpbuffer2, lbsize);
+       return(rval);
+}
+
+/*
+ * Redistribute the directory entries between two leaf nodes,
+ * taking into account the size of the new entry.
+ *
+ * NOTE: if new block is empty, then it will get the upper half of old block.
+ */
+STATIC void
+xfs_dir_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
+                                     xfs_da_state_blk_t *blk2)
+{
+       xfs_da_state_blk_t *tmp_blk;
+       xfs_dir_leafblock_t *leaf1, *leaf2;
+       xfs_dir_leaf_hdr_t *hdr1, *hdr2;
+       int count, totallen, max, space, swap;
+
+       /*
+        * Set up environment.
+        */
+       ASSERT(blk1->magic == XFS_DIR_LEAF_MAGIC);
+       ASSERT(blk2->magic == XFS_DIR_LEAF_MAGIC);
+       leaf1 = blk1->bp->data;
+       leaf2 = blk2->bp->data;
+       ASSERT(INT_GET(leaf1->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+       ASSERT(INT_GET(leaf2->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+
+       /*
+        * Check ordering of blocks, reverse if it makes things simpler.
+        */
+       swap = 0;
+       if (xfs_dir_leaf_order(blk1->bp, blk2->bp)) {
+               tmp_blk = blk1;
+               blk1 = blk2;
+               blk2 = tmp_blk;
+               leaf1 = blk1->bp->data;
+               leaf2 = blk2->bp->data;
+               swap = 1;
+       }
+       hdr1 = &leaf1->hdr;
+       hdr2 = &leaf2->hdr;
+
+       /*
+        * Examine entries until we reduce the absolute difference in
+        * byte usage between the two blocks to a minimum.  Then get
+        * the direction to copy and the number of elements to move.
+        */
+       state->inleaf = xfs_dir_leaf_figure_balance(state, blk1, blk2,
+                                                          &count, &totallen);
+       if (swap)
+               state->inleaf = !state->inleaf;
+
+       /*
+        * Move any entries required from leaf to leaf:
+        */
+       if (count < INT_GET(hdr1->count, ARCH_CONVERT)) {
+               /*
+                * Figure the total bytes to be added to the destination leaf.
+                */
+               count = INT_GET(hdr1->count, ARCH_CONVERT) - count;     /* number entries being moved */
+               space  = INT_GET(hdr1->namebytes, ARCH_CONVERT) - totallen;
+               space += count * ((uint)sizeof(xfs_dir_leaf_name_t)-1);
+               space += count * (uint)sizeof(xfs_dir_leaf_entry_t);
+
+               /*
+                * leaf2 is the destination, compact it if it looks tight.
+                */
+               max  = INT_GET(hdr2->firstused, ARCH_CONVERT) - (uint)sizeof(xfs_dir_leaf_hdr_t);
+               max -= INT_GET(hdr2->count, ARCH_CONVERT) * (uint)sizeof(xfs_dir_leaf_entry_t);
+               if (space > max) {
+                       xfs_dir_leaf_compact(state->args->trans, blk2->bp,
+                                                                0, 0);
+               }
+
+               /*
+                * Move high entries from leaf1 to low end of leaf2.
+                */
+               xfs_dir_leaf_moveents(leaf1, INT_GET(hdr1->count, ARCH_CONVERT) - count,
+                                            leaf2, 0, count, state->mp);
+
+               xfs_da_log_buf(state->args->trans, blk1->bp, 0,
+                                                  state->blocksize-1);
+               xfs_da_log_buf(state->args->trans, blk2->bp, 0,
+                                                  state->blocksize-1);
+
+       } else if (count > INT_GET(hdr1->count, ARCH_CONVERT)) {
+               /*
+                * Figure the total bytes to be added to the destination leaf.
+                */
+               count -= INT_GET(hdr1->count, ARCH_CONVERT);            /* number entries being moved */
+               space  = totallen - INT_GET(hdr1->namebytes, ARCH_CONVERT);
+               space += count * ((uint)sizeof(xfs_dir_leaf_name_t)-1);
+               space += count * (uint)sizeof(xfs_dir_leaf_entry_t);
+
+               /*
+                * leaf1 is the destination, compact it if it looks tight.
+                */
+               max  = INT_GET(hdr1->firstused, ARCH_CONVERT) - (uint)sizeof(xfs_dir_leaf_hdr_t);
+               max -= INT_GET(hdr1->count, ARCH_CONVERT) * (uint)sizeof(xfs_dir_leaf_entry_t);
+               if (space > max) {
+                       xfs_dir_leaf_compact(state->args->trans, blk1->bp,
+                                                                0, 0);
+               }
+
+               /*
+                * Move low entries from leaf2 to high end of leaf1.
+                */
+               xfs_dir_leaf_moveents(leaf2, 0, leaf1, (int)INT_GET(hdr1->count, ARCH_CONVERT),
+                                            count, state->mp);
+
+               xfs_da_log_buf(state->args->trans, blk1->bp, 0,
+                                                  state->blocksize-1);
+               xfs_da_log_buf(state->args->trans, blk2->bp, 0,
+                                                  state->blocksize-1);
+       }
+
+       /*
+        * Copy out last hashval in each block for B-tree code.
+        */
+       blk1->hashval = INT_GET(leaf1->entries[ INT_GET(leaf1->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
+       blk2->hashval = INT_GET(leaf2->entries[ INT_GET(leaf2->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
+
+       /*
+        * Adjust the expected index for insertion.
+        * GROT: this doesn't work unless blk2 was originally empty.
+        */
+       if (!state->inleaf) {
+               blk2->index = blk1->index - INT_GET(leaf1->hdr.count, ARCH_CONVERT);
+       }
+}
+
+/*
+ * Examine entries until we reduce the absolute difference in
+ * byte usage between the two blocks to a minimum.
+ * GROT: Is this really necessary?  With other than a 512 byte blocksize,
+ * GROT: there will always be enough room in either block for a new entry.
+ * GROT: Do a double-split for this case?
+ */
+STATIC int
+xfs_dir_leaf_figure_balance(xfs_da_state_t *state,
+                                          xfs_da_state_blk_t *blk1,
+                                          xfs_da_state_blk_t *blk2,
+                                          int *countarg, int *namebytesarg)
+{
+       xfs_dir_leafblock_t *leaf1, *leaf2;
+       xfs_dir_leaf_hdr_t *hdr1, *hdr2;
+       xfs_dir_leaf_entry_t *entry;
+       int count, max, totallen, half;
+       int lastdelta, foundit, tmp;
+
+       /*
+        * Set up environment.
+        */
+       leaf1 = blk1->bp->data;
+       leaf2 = blk2->bp->data;
+       hdr1 = &leaf1->hdr;
+       hdr2 = &leaf2->hdr;
+       foundit = 0;
+       totallen = 0;
+
+       /*
+        * Examine entries until we reduce the absolute difference in
+        * byte usage between the two blocks to a minimum.
+        */
+       max = INT_GET(hdr1->count, ARCH_CONVERT) + INT_GET(hdr2->count, ARCH_CONVERT);
+       half  = (max+1) * (uint)(sizeof(*entry)+sizeof(xfs_dir_leaf_entry_t)-1);
+       half += INT_GET(hdr1->namebytes, ARCH_CONVERT) + INT_GET(hdr2->namebytes, ARCH_CONVERT) + state->args->namelen;
+       half /= 2;
+       lastdelta = state->blocksize;
+       entry = &leaf1->entries[0];
+       for (count = 0; count < max; entry++, count++) {
+
+#define XFS_DIR_ABS(A) (((A) < 0) ? -(A) : (A))
+               /*
+                * The new entry is in the first block, account for it.
+                */
+               if (count == blk1->index) {
+                       tmp = totallen + (uint)sizeof(*entry)
+                               + XFS_DIR_LEAF_ENTSIZE_BYNAME(state->args->namelen);
+                       if (XFS_DIR_ABS(half - tmp) > lastdelta)
+                               break;
+                       lastdelta = XFS_DIR_ABS(half - tmp);
+                       totallen = tmp;
+                       foundit = 1;
+               }
+
+               /*
+                * Wrap around into the second block if necessary.
+                */
+               if (count == INT_GET(hdr1->count, ARCH_CONVERT)) {
+                       leaf1 = leaf2;
+                       entry = &leaf1->entries[0];
+               }
+
+               /*
+                * Figure out if next leaf entry would be too much.
+                */
+               tmp = totallen + (uint)sizeof(*entry)
+                               + XFS_DIR_LEAF_ENTSIZE_BYENTRY(entry);
+               if (XFS_DIR_ABS(half - tmp) > lastdelta)
+                       break;
+               lastdelta = XFS_DIR_ABS(half - tmp);
+               totallen = tmp;
+#undef XFS_DIR_ABS
+       }
+
+       /*
+        * Calculate the number of namebytes that will end up in lower block.
+        * If new entry not in lower block, fix up the count.
+        */
+       totallen -=
+               count * (uint)(sizeof(*entry)+sizeof(xfs_dir_leaf_entry_t)-1);
+       if (foundit) {
+               totallen -= (sizeof(*entry)+sizeof(xfs_dir_leaf_entry_t)-1) +
+                           state->args->namelen;
+       }
+
+       *countarg = count;
+       *namebytesarg = totallen;
+       return(foundit);
+}
+
+/*========================================================================
+ * Routines used for shrinking the Btree.
+ *========================================================================*/
+
+/*
+ * Check a leaf block and its neighbors to see if the block should be
+ * collapsed into one or the other neighbor.  Always keep the block
+ * with the smaller block number.
+ * If the current block is over 50% full, don't try to join it, return 0.
+ * If the block is empty, fill in the state structure and return 2.
+ * If it can be collapsed, fill in the state structure and return 1.
+ * If nothing can be done, return 0.
+ */
+int
+xfs_dir_leaf_toosmall(xfs_da_state_t *state, int *action)
+{
+       xfs_dir_leafblock_t *leaf;
+       xfs_da_state_blk_t *blk;
+       xfs_da_blkinfo_t *info;
+       int count, bytes, forward, error, retval, i;
+       xfs_dablk_t blkno;
+       xfs_dabuf_t *bp;
+
+       /*
+        * Check for the degenerate case of the block being over 50% full.
+        * If so, it's not worth even looking to see if we might be able
+        * to coalesce with a sibling.
+        */
+       blk = &state->path.blk[ state->path.active-1 ];
+       info = blk->bp->data;
+       ASSERT(INT_GET(info->magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+       leaf = (xfs_dir_leafblock_t *)info;
+       count = INT_GET(leaf->hdr.count, ARCH_CONVERT);
+       bytes = (uint)sizeof(xfs_dir_leaf_hdr_t) +
+               count * (uint)sizeof(xfs_dir_leaf_entry_t) +
+               count * ((uint)sizeof(xfs_dir_leaf_name_t)-1) +
+               INT_GET(leaf->hdr.namebytes, ARCH_CONVERT);
+       if (bytes > (state->blocksize >> 1)) {
+               *action = 0;    /* blk over 50%, dont try to join */
+               return(0);
+       }
+
+       /*
+        * Check for the degenerate case of the block being empty.
+        * If the block is empty, we'll simply delete it, no need to
+        * coalesce it with a sibling block.  We choose (aribtrarily)
+        * to merge with the forward block unless it is NULL.
+        */
+       if (count == 0) {
+               /*
+                * Make altpath point to the block we want to keep and
+                * path point to the block we want to drop (this one).
+                */
+               forward = !INT_ISZERO(info->forw, ARCH_CONVERT);
+               bcopy(&state->path, &state->altpath, sizeof(state->path));
+               error = xfs_da_path_shift(state, &state->altpath, forward,
+                                                0, &retval);
+               if (error)
+                       return(error);
+               if (retval) {
+                       *action = 0;
+               } else {
+                       *action = 2;
+               }
+               return(0);
+       }
+
+       /*
+        * Examine each sibling block to see if we can coalesce with
+        * at least 25% free space to spare.  We need to figure out
+        * whether to merge with the forward or the backward block.
+        * We prefer coalescing with the lower numbered sibling so as
+        * to shrink a directory over time.
+        */
+       forward = (INT_GET(info->forw, ARCH_CONVERT) < INT_GET(info->back, ARCH_CONVERT));      /* start with smaller blk num */
+       for (i = 0; i < 2; forward = !forward, i++) {
+               if (forward)
+                       blkno = INT_GET(info->forw, ARCH_CONVERT);
+               else
+                       blkno = INT_GET(info->back, ARCH_CONVERT);
+               if (blkno == 0)
+                       continue;
+               error = xfs_da_read_buf(state->args->trans, state->args->dp,
+                                                           blkno, -1, &bp,
+                                                           XFS_DATA_FORK);
+               if (error)
+                       return(error);
+               ASSERT(bp != NULL);
+
+               leaf = (xfs_dir_leafblock_t *)info;
+               count  = INT_GET(leaf->hdr.count, ARCH_CONVERT);
+               bytes  = state->blocksize - (state->blocksize>>2);
+               bytes -= INT_GET(leaf->hdr.namebytes, ARCH_CONVERT);
+               leaf = bp->data;
+               ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+               count += INT_GET(leaf->hdr.count, ARCH_CONVERT);
+               bytes -= INT_GET(leaf->hdr.namebytes, ARCH_CONVERT);
+               bytes -= count * ((uint)sizeof(xfs_dir_leaf_name_t) - 1);
+               bytes -= count * (uint)sizeof(xfs_dir_leaf_entry_t);
+               bytes -= (uint)sizeof(xfs_dir_leaf_hdr_t);
+               if (bytes >= 0)
+                       break;  /* fits with at least 25% to spare */
+
+               xfs_da_brelse(state->args->trans, bp);
+       }
+       if (i >= 2) {
+               *action = 0;
+               return(0);
+       }
+       xfs_da_buf_done(bp);
+
+       /*
+        * Make altpath point to the block we want to keep (the lower
+        * numbered block) and path point to the block we want to drop.
+        */
+       bcopy(&state->path, &state->altpath, sizeof(state->path));
+       if (blkno < blk->blkno) {
+               error = xfs_da_path_shift(state, &state->altpath, forward,
+                                                0, &retval);
+       } else {
+               error = xfs_da_path_shift(state, &state->path, forward,
+                                                0, &retval);
+       }
+       if (error)
+               return(error);
+       if (retval) {
+               *action = 0;
+       } else {
+               *action = 1;
+       }
+       return(0);
+}
+
+/*
+ * Remove a name from the leaf directory structure.
+ *
+ * Return 1 if leaf is less than 37% full, 0 if >= 37% full.
+ * If two leaves are 37% full, when combined they will leave 25% free.
+ */
+int
+xfs_dir_leaf_remove(xfs_trans_t *trans, xfs_dabuf_t *bp, int index)
+{
+       xfs_dir_leafblock_t *leaf;
+       xfs_dir_leaf_hdr_t *hdr;
+       xfs_dir_leaf_map_t *map;
+       xfs_dir_leaf_entry_t *entry;
+       xfs_dir_leaf_name_t *namest;
+       int before, after, smallest, entsize;
+       int tablesize, tmp, i;
+       xfs_mount_t *mp;
+
+       leaf = bp->data;
+       ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+       hdr = &leaf->hdr;
+       mp = trans->t_mountp;
+       ASSERT((INT_GET(hdr->count, ARCH_CONVERT) > 0) && (INT_GET(hdr->count, ARCH_CONVERT) < (XFS_LBSIZE(mp)/8)));
+       ASSERT((index >= 0) && (index < INT_GET(hdr->count, ARCH_CONVERT)));
+       ASSERT(INT_GET(hdr->firstused, ARCH_CONVERT) >= ((INT_GET(hdr->count, ARCH_CONVERT)*sizeof(*entry))+sizeof(*hdr)));
+       entry = &leaf->entries[index];
+       ASSERT(INT_GET(entry->nameidx, ARCH_CONVERT) >= INT_GET(hdr->firstused, ARCH_CONVERT));
+       ASSERT(INT_GET(entry->nameidx, ARCH_CONVERT) < XFS_LBSIZE(mp));
+
+       /*
+        * Scan through free region table:
+        *    check for adjacency of free'd entry with an existing one,
+        *    find smallest free region in case we need to replace it,
+        *    adjust any map that borders the entry table,
+        */
+       tablesize = INT_GET(hdr->count, ARCH_CONVERT) * (uint)sizeof(xfs_dir_leaf_entry_t)
+                       + (uint)sizeof(xfs_dir_leaf_hdr_t);
+       map = &hdr->freemap[0];
+       tmp = INT_GET(map->size, ARCH_CONVERT);
+       before = after = -1;
+       smallest = XFS_DIR_LEAF_MAPSIZE - 1;
+       entsize = XFS_DIR_LEAF_ENTSIZE_BYENTRY(entry);
+       for (i = 0; i < XFS_DIR_LEAF_MAPSIZE; map++, i++) {
+               ASSERT(INT_GET(map->base, ARCH_CONVERT) < XFS_LBSIZE(mp));
+               ASSERT(INT_GET(map->size, ARCH_CONVERT) < XFS_LBSIZE(mp));
+               if (INT_GET(map->base, ARCH_CONVERT) == tablesize) {
+                       INT_MOD(map->base, ARCH_CONVERT, -((uint)sizeof(xfs_dir_leaf_entry_t)));
+                       INT_MOD(map->size, ARCH_CONVERT, (uint)sizeof(xfs_dir_leaf_entry_t));
+               }
+
+               if ((INT_GET(map->base, ARCH_CONVERT) + INT_GET(map->size, ARCH_CONVERT)) == INT_GET(entry->nameidx, ARCH_CONVERT)) {
+                       before = i;
+               } else if (INT_GET(map->base, ARCH_CONVERT) == (INT_GET(entry->nameidx, ARCH_CONVERT) + entsize)) {
+                       after = i;
+               } else if (INT_GET(map->size, ARCH_CONVERT) < tmp) {
+                       tmp = INT_GET(map->size, ARCH_CONVERT);
+                       smallest = i;
+               }
+       }
+
+       /*
+        * Coalesce adjacent freemap regions,
+        * or replace the smallest region.
+        */
+       if ((before >= 0) || (after >= 0)) {
+               if ((before >= 0) && (after >= 0)) {
+                       map = &hdr->freemap[before];
+                       INT_MOD(map->size, ARCH_CONVERT, entsize);
+                       INT_MOD(map->size, ARCH_CONVERT, INT_GET(hdr->freemap[after].size, ARCH_CONVERT));
+                       INT_ZERO(hdr->freemap[after].base, ARCH_CONVERT);
+                       INT_ZERO(hdr->freemap[after].size, ARCH_CONVERT);
+               } else if (before >= 0) {
+                       map = &hdr->freemap[before];
+                       INT_MOD(map->size, ARCH_CONVERT, entsize);
+               } else {
+                       map = &hdr->freemap[after];
+                       INT_COPY(map->base, entry->nameidx, ARCH_CONVERT); 
+                       INT_MOD(map->size, ARCH_CONVERT, entsize);
+               }
+       } else {
+               /*
+                * Replace smallest region (if it is smaller than free'd entry)
+                */
+               map = &hdr->freemap[smallest];
+               if (INT_GET(map->size, ARCH_CONVERT) < entsize) {
+                       INT_COPY(map->base, entry->nameidx, ARCH_CONVERT); 
+                       INT_SET(map->size, ARCH_CONVERT, entsize);
+               }
+       }
+
+       /*
+        * Did we remove the first entry?
+        */
+       if (INT_GET(entry->nameidx, ARCH_CONVERT) == INT_GET(hdr->firstused, ARCH_CONVERT))
+               smallest = 1;
+       else
+               smallest = 0;
+
+       /*
+        * Compress the remaining entries and zero out the removed stuff.
+        */
+       namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT));
+       bzero((char *)namest, entsize);
+       xfs_da_log_buf(trans, bp, XFS_DA_LOGRANGE(leaf, namest, entsize));
+
+       INT_MOD(hdr->namebytes, ARCH_CONVERT, -(entry->namelen));
+       tmp = (INT_GET(hdr->count, ARCH_CONVERT) - index) * (uint)sizeof(xfs_dir_leaf_entry_t);
+       ovbcopy(entry + 1, entry, tmp);
+       INT_MOD(hdr->count, ARCH_CONVERT, -1);
+       xfs_da_log_buf(trans, bp,
+           XFS_DA_LOGRANGE(leaf, entry, tmp + (uint)sizeof(*entry)));
+       entry = &leaf->entries[INT_GET(hdr->count, ARCH_CONVERT)];
+       bzero((char *)entry, sizeof(xfs_dir_leaf_entry_t));
+
+       /*
+        * If we removed the first entry, re-find the first used byte
+        * in the name area.  Note that if the entry was the "firstused",
+        * then we don't have a "hole" in our block resulting from
+        * removing the name.
+        */
+       if (smallest) {
+               tmp = XFS_LBSIZE(mp);
+               entry = &leaf->entries[0];
+               for (i = INT_GET(hdr->count, ARCH_CONVERT)-1; i >= 0; entry++, i--) {
+                       ASSERT(INT_GET(entry->nameidx, ARCH_CONVERT) >= INT_GET(hdr->firstused, ARCH_CONVERT));
+                       ASSERT(INT_GET(entry->nameidx, ARCH_CONVERT) < XFS_LBSIZE(mp));
+                       if (INT_GET(entry->nameidx, ARCH_CONVERT) < tmp)
+                               tmp = INT_GET(entry->nameidx, ARCH_CONVERT);
+               }
+               INT_SET(hdr->firstused, ARCH_CONVERT, tmp);
+               if (INT_GET(hdr->firstused, ARCH_CONVERT) == 0)
+                       INT_SET(hdr->firstused, ARCH_CONVERT, tmp - 1);
+       } else {
+               hdr->holes = 1;         /* mark as needing compaction */
+       }
+
+       xfs_da_log_buf(trans, bp, XFS_DA_LOGRANGE(leaf, hdr, sizeof(*hdr)));
+
+       /*
+        * Check if leaf is less than 50% full, caller may want to
+        * "join" the leaf with a sibling if so.
+        */
+       tmp  = (uint)sizeof(xfs_dir_leaf_hdr_t);
+       tmp += INT_GET(leaf->hdr.count, ARCH_CONVERT) * (uint)sizeof(xfs_dir_leaf_entry_t);
+       tmp += INT_GET(leaf->hdr.count, ARCH_CONVERT) * ((uint)sizeof(xfs_dir_leaf_name_t) - 1);
+       tmp += INT_GET(leaf->hdr.namebytes, ARCH_CONVERT);
+       if (tmp < mp->m_dir_magicpct)
+               return(1);                      /* leaf is < 37% full */
+       return(0);
+}
+
+/*
+ * Move all the directory entries from drop_leaf into save_leaf.
+ */
+void
+xfs_dir_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
+                                     xfs_da_state_blk_t *save_blk)
+{
+       xfs_dir_leafblock_t *drop_leaf, *save_leaf, *tmp_leaf;
+       xfs_dir_leaf_hdr_t *drop_hdr, *save_hdr, *tmp_hdr;
+       xfs_mount_t *mp;
+       char *tmpbuffer;
+
+       /*
+        * Set up environment.
+        */
+       mp = state->mp;
+       ASSERT(drop_blk->magic == XFS_DIR_LEAF_MAGIC);
+       ASSERT(save_blk->magic == XFS_DIR_LEAF_MAGIC);
+       drop_leaf = drop_blk->bp->data;
+       save_leaf = save_blk->bp->data;
+       ASSERT(INT_GET(drop_leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+       ASSERT(INT_GET(save_leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+       drop_hdr = &drop_leaf->hdr;
+       save_hdr = &save_leaf->hdr;
+
+       /*
+        * Save last hashval from dying block for later Btree fixup.
+        */
+       drop_blk->hashval = INT_GET(drop_leaf->entries[ drop_leaf->hdr.count-1 ].hashval, ARCH_CONVERT);
+
+       /*
+        * Check if we need a temp buffer, or can we do it in place.
+        * Note that we don't check "leaf" for holes because we will
+        * always be dropping it, toosmall() decided that for us already.
+        */
+       if (save_hdr->holes == 0) {
+               /*
+                * dest leaf has no holes, so we add there.  May need
+                * to make some room in the entry array.
+                */
+               if (xfs_dir_leaf_order(save_blk->bp, drop_blk->bp)) {
+                       xfs_dir_leaf_moveents(drop_leaf, 0, save_leaf, 0,
+                                                (int)INT_GET(drop_hdr->count, ARCH_CONVERT), mp);
+               } else {
+                       xfs_dir_leaf_moveents(drop_leaf, 0,
+                                             save_leaf, INT_GET(save_hdr->count, ARCH_CONVERT),
+                                             (int)INT_GET(drop_hdr->count, ARCH_CONVERT), mp);
+               }
+       } else {
+               /*
+                * Destination has holes, so we make a temporary copy
+                * of the leaf and add them both to that.
+                */
+               tmpbuffer = kmem_alloc(state->blocksize, KM_SLEEP);
+               ASSERT(tmpbuffer != NULL);
+               bzero(tmpbuffer, state->blocksize);
+               tmp_leaf = (xfs_dir_leafblock_t *)tmpbuffer;
+               tmp_hdr = &tmp_leaf->hdr;
+               tmp_hdr->info = save_hdr->info; /* struct copy */
+               INT_ZERO(tmp_hdr->count, ARCH_CONVERT);
+               INT_SET(tmp_hdr->firstused, ARCH_CONVERT, state->blocksize);
+               if (INT_GET(tmp_hdr->firstused, ARCH_CONVERT) == 0)
+                       INT_SET(tmp_hdr->firstused, ARCH_CONVERT, state->blocksize - 1);
+               INT_ZERO(tmp_hdr->namebytes, ARCH_CONVERT);
+               if (xfs_dir_leaf_order(save_blk->bp, drop_blk->bp)) {
+                       xfs_dir_leaf_moveents(drop_leaf, 0, tmp_leaf, 0,
+                                                (int)INT_GET(drop_hdr->count, ARCH_CONVERT), mp);
+                       xfs_dir_leaf_moveents(save_leaf, 0,
+                                             tmp_leaf, INT_GET(tmp_leaf->hdr.count, ARCH_CONVERT),
+                                             (int)INT_GET(save_hdr->count, ARCH_CONVERT), mp);
+               } else {
+                       xfs_dir_leaf_moveents(save_leaf, 0, tmp_leaf, 0,        
+                                                (int)INT_GET(save_hdr->count, ARCH_CONVERT), mp);
+                       xfs_dir_leaf_moveents(drop_leaf, 0,
+                                             tmp_leaf, INT_GET(tmp_leaf->hdr.count, ARCH_CONVERT),
+                                             (int)INT_GET(drop_hdr->count, ARCH_CONVERT), mp);
+               }
+               bcopy(tmp_leaf, save_leaf, state->blocksize);
+               kmem_free(tmpbuffer, state->blocksize);
+       }
+
+       xfs_da_log_buf(state->args->trans, save_blk->bp, 0,
+                                          state->blocksize - 1);
+
+       /*
+        * Copy out last hashval in each block for B-tree code.
+        */
+       save_blk->hashval = INT_GET(save_leaf->entries[ INT_GET(save_leaf->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
+}
+
+
+/*========================================================================
+ * Routines used for finding things in the Btree.
+ *========================================================================*/
+
+/*
+ * Look up a name in a leaf directory structure.
+ * This is the internal routine, it uses the caller's buffer.
+ *
+ * Note that duplicate keys are allowed, but only check within the
+ * current leaf node.  The Btree code must check in adjacent leaf nodes.
+ *
+ * Return in *index the index into the entry[] array of either the found
+ * entry, or where the entry should have been (insert before that entry).
+ *
+ * Don't change the args->inumber unless we find the filename.
+ */
+int
+xfs_dir_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args, int *index)
+{
+       xfs_dir_leafblock_t *leaf;
+       xfs_dir_leaf_entry_t *entry;
+       xfs_dir_leaf_name_t *namest;
+       int probe, span;
+       xfs_dahash_t hashval;
+
+       leaf = bp->data;
+       ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+       ASSERT(INT_GET(leaf->hdr.count, ARCH_CONVERT) < (XFS_LBSIZE(args->dp->i_mount)/8));
+
+       /*
+        * Binary search.  (note: small blocks will skip this loop)
+        */
+       hashval = args->hashval;
+       probe = span = INT_GET(leaf->hdr.count, ARCH_CONVERT) / 2;
+       for (entry = &leaf->entries[probe]; span > 4;
+                  entry = &leaf->entries[probe]) {
+               span /= 2;
+               if (INT_GET(entry->hashval, ARCH_CONVERT) < hashval)
+                       probe += span;
+               else if (INT_GET(entry->hashval, ARCH_CONVERT) > hashval)
+                       probe -= span;
+               else
+                       break;
+       }
+       ASSERT((probe >= 0) && \
+              ((INT_GET(leaf->hdr.count, ARCH_CONVERT) == 0) || (probe < INT_GET(leaf->hdr.count, ARCH_CONVERT))));
+       ASSERT((span <= 4) || (INT_GET(entry->hashval, ARCH_CONVERT) == hashval));
+
+       /*
+        * Since we may have duplicate hashval's, find the first matching
+        * hashval in the leaf.
+        */
+       while ((probe > 0) && (INT_GET(entry->hashval, ARCH_CONVERT) >= hashval)) {
+               entry--;
+               probe--;
+       }
+       while ((probe < INT_GET(leaf->hdr.count, ARCH_CONVERT)) && (INT_GET(entry->hashval, ARCH_CONVERT) < hashval)) {
+               entry++;
+               probe++;
+       }
+       if ((probe == INT_GET(leaf->hdr.count, ARCH_CONVERT)) || (INT_GET(entry->hashval, ARCH_CONVERT) != hashval)) {
+               *index = probe;
+               ASSERT(args->oknoent);
+               return(XFS_ERROR(ENOENT));
+       }
+
+       /*
+        * Duplicate keys may be present, so search all of them for a match.
+        */
+       while ((probe < INT_GET(leaf->hdr.count, ARCH_CONVERT)) && (INT_GET(entry->hashval, ARCH_CONVERT) == hashval)) {
+               namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT));
+               if (entry->namelen == args->namelen &&
+                   namest->name[0] == args->name[0] &&
+                   bcmp(args->name, namest->name, args->namelen) == 0) {
+                       XFS_DIR_SF_GET_DIRINO_ARCH(&namest->inumber, &args->inumber, ARCH_CONVERT);
+                       *index = probe;
+                       return(XFS_ERROR(EEXIST));
+               }
+               entry++;
+               probe++;
+       }
+       *index = probe;
+       ASSERT(probe == INT_GET(leaf->hdr.count, ARCH_CONVERT) || args->oknoent);
+       return(XFS_ERROR(ENOENT));
+}
+
+/*========================================================================
+ * Utility routines.
+ *========================================================================*/
+
+/*
+ * Move the indicated entries from one leaf to another.
+ * NOTE: this routine modifies both source and destination leaves.
+ */
+/* ARGSUSED */
+STATIC void
+xfs_dir_leaf_moveents(xfs_dir_leafblock_t *leaf_s, int start_s,
+                     xfs_dir_leafblock_t *leaf_d, int start_d,
+                     int count, xfs_mount_t *mp)
+{
+       xfs_dir_leaf_hdr_t *hdr_s, *hdr_d;
+       xfs_dir_leaf_entry_t *entry_s, *entry_d;
+       int tmp, i;
+
+       /*
+        * Check for nothing to do.
+        */
+       if (count == 0)
+               return;
+
+       /*
+        * Set up environment.
+        */
+       ASSERT(INT_GET(leaf_s->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+       ASSERT(INT_GET(leaf_d->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+       hdr_s = &leaf_s->hdr;
+       hdr_d = &leaf_d->hdr;
+       ASSERT((INT_GET(hdr_s->count, ARCH_CONVERT) > 0) && (INT_GET(hdr_s->count, ARCH_CONVERT) < (XFS_LBSIZE(mp)/8)));
+       ASSERT(INT_GET(hdr_s->firstused, ARCH_CONVERT) >= 
+               ((INT_GET(hdr_s->count, ARCH_CONVERT)*sizeof(*entry_s))+sizeof(*hdr_s)));
+       ASSERT(INT_GET(hdr_d->count, ARCH_CONVERT) < (XFS_LBSIZE(mp)/8));
+       ASSERT(INT_GET(hdr_d->firstused, ARCH_CONVERT) >= 
+               ((INT_GET(hdr_d->count, ARCH_CONVERT)*sizeof(*entry_d))+sizeof(*hdr_d)));
+
+       ASSERT(start_s < INT_GET(hdr_s->count, ARCH_CONVERT));
+       ASSERT(start_d <= INT_GET(hdr_d->count, ARCH_CONVERT));
+       ASSERT(count <= INT_GET(hdr_s->count, ARCH_CONVERT));
+
+       /*
+        * Move the entries in the destination leaf up to make a hole?
+        */
+       if (start_d < INT_GET(hdr_d->count, ARCH_CONVERT)) {
+               tmp  = INT_GET(hdr_d->count, ARCH_CONVERT) - start_d;
+               tmp *= (uint)sizeof(xfs_dir_leaf_entry_t);
+               entry_s = &leaf_d->entries[start_d];
+               entry_d = &leaf_d->entries[start_d + count];
+               bcopy(entry_s, entry_d, tmp);
+       }
+
+       /*
+        * Copy all entry's in the same (sorted) order,
+        * but allocate filenames packed and in sequence.
+        */
+       entry_s = &leaf_s->entries[start_s];
+       entry_d = &leaf_d->entries[start_d];
+       for (i = 0; i < count; entry_s++, entry_d++, i++) {
+               ASSERT(INT_GET(entry_s->nameidx, ARCH_CONVERT) >= INT_GET(hdr_s->firstused, ARCH_CONVERT));
+               ASSERT(entry_s->namelen < MAXNAMELEN);
+               tmp = XFS_DIR_LEAF_ENTSIZE_BYENTRY(entry_s);
+               INT_MOD(hdr_d->firstused, ARCH_CONVERT, -(tmp));
+               entry_d->hashval = entry_s->hashval; /* INT_: direct copy */
+               INT_COPY(entry_d->nameidx, hdr_d->firstused, ARCH_CONVERT); 
+               entry_d->namelen = entry_s->namelen;
+               ASSERT(INT_GET(entry_d->nameidx, ARCH_CONVERT) + tmp <= XFS_LBSIZE(mp));
+               bcopy(XFS_DIR_LEAF_NAMESTRUCT(leaf_s, INT_GET(entry_s->nameidx, ARCH_CONVERT)),
+                     XFS_DIR_LEAF_NAMESTRUCT(leaf_d, INT_GET(entry_d->nameidx, ARCH_CONVERT)), tmp);
+               ASSERT(INT_GET(entry_s->nameidx, ARCH_CONVERT) + tmp <= XFS_LBSIZE(mp));
+               bzero((char *)XFS_DIR_LEAF_NAMESTRUCT(leaf_s, INT_GET(entry_s->nameidx, ARCH_CONVERT)),
+                     tmp);
+               INT_MOD(hdr_s->namebytes, ARCH_CONVERT, -(entry_d->namelen));
+               INT_MOD(hdr_d->namebytes, ARCH_CONVERT, entry_d->namelen);
+               INT_MOD(hdr_s->count, ARCH_CONVERT, -1);
+               INT_MOD(hdr_d->count, ARCH_CONVERT, +1);
+               tmp  = INT_GET(hdr_d->count, ARCH_CONVERT) * (uint)sizeof(xfs_dir_leaf_entry_t)
+                               + (uint)sizeof(xfs_dir_leaf_hdr_t);
+               ASSERT(INT_GET(hdr_d->firstused, ARCH_CONVERT) >= tmp);
+
+       }
+
+       /*
+        * Zero out the entries we just copied.
+        */
+       if (start_s == INT_GET(hdr_s->count, ARCH_CONVERT)) {
+               tmp = count * (uint)sizeof(xfs_dir_leaf_entry_t);
+               entry_s = &leaf_s->entries[start_s];
+               ASSERT((char *)entry_s + tmp <= (char *)leaf_s + XFS_LBSIZE(mp));
+               bzero((char *)entry_s, tmp);
+       } else {
+               /*
+                * Move the remaining entries down to fill the hole,
+                * then zero the entries at the top.
+                */
+               tmp  = INT_GET(hdr_s->count, ARCH_CONVERT) - count;
+               tmp *= (uint)sizeof(xfs_dir_leaf_entry_t);
+               entry_s = &leaf_s->entries[start_s + count];
+               entry_d = &leaf_s->entries[start_s];
+               bcopy(entry_s, entry_d, tmp);
+
+               tmp = count * (uint)sizeof(xfs_dir_leaf_entry_t);
+               entry_s = &leaf_s->entries[INT_GET(hdr_s->count, ARCH_CONVERT)];
+               ASSERT((char *)entry_s + tmp <= (char *)leaf_s + XFS_LBSIZE(mp));
+               bzero((char *)entry_s, tmp);
+       }
+
+       /*
+        * Fill in the freemap information
+        */
+       INT_SET(hdr_d->freemap[0].base, ARCH_CONVERT, (uint)sizeof(xfs_dir_leaf_hdr_t));
+       INT_MOD(hdr_d->freemap[0].base, ARCH_CONVERT, INT_GET(hdr_d->count, ARCH_CONVERT) * (uint)sizeof(xfs_dir_leaf_entry_t));
+       INT_SET(hdr_d->freemap[0].size, ARCH_CONVERT, INT_GET(hdr_d->firstused, ARCH_CONVERT) - INT_GET(hdr_d->freemap[0].base, ARCH_CONVERT));
+       INT_SET(hdr_d->freemap[1].base, ARCH_CONVERT, INT_ZERO(hdr_d->freemap[2].base, ARCH_CONVERT));
+       INT_SET(hdr_d->freemap[1].size, ARCH_CONVERT, INT_ZERO(hdr_d->freemap[2].size, ARCH_CONVERT));
+       hdr_s->holes = 1;       /* leaf may not be compact */
+}
+
+/*
+ * Compare two leaf blocks "order".
+ */
+int
+xfs_dir_leaf_order(xfs_dabuf_t *leaf1_bp, xfs_dabuf_t *leaf2_bp)
+{
+       xfs_dir_leafblock_t *leaf1, *leaf2;
+
+       leaf1 = leaf1_bp->data;
+       leaf2 = leaf2_bp->data;
+       ASSERT((INT_GET(leaf1->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC) &&
+              (INT_GET(leaf2->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC));
+       if ((INT_GET(leaf1->hdr.count, ARCH_CONVERT) > 0) && (INT_GET(leaf2->hdr.count, ARCH_CONVERT) > 0) && 
+           ((INT_GET(leaf2->entries[ 0 ].hashval, ARCH_CONVERT) <
+             INT_GET(leaf1->entries[ 0 ].hashval, ARCH_CONVERT)) ||
+            (INT_GET(leaf2->entries[ INT_GET(leaf2->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT) <
+             INT_GET(leaf1->entries[ INT_GET(leaf1->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT)))) {
+               return(1);
+       }
+       return(0);
+}
+
+/*
+ * Pick up the last hashvalue from a leaf block.
+ */
+xfs_dahash_t
+xfs_dir_leaf_lasthash(xfs_dabuf_t *bp, int *count)
+{
+       xfs_dir_leafblock_t *leaf;
+
+       leaf = bp->data;
+       ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+       if (count)
+               *count = INT_GET(leaf->hdr.count, ARCH_CONVERT);
+       if (INT_GET(leaf->hdr.count, ARCH_CONVERT) == 0)
+               return(0);
+       return(INT_GET(leaf->entries[ INT_GET(leaf->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT));
+}
diff --git a/libxfs/xfs_ialloc.c b/libxfs/xfs_ialloc.c
new file mode 100644 (file)
index 0000000..92e2818
--- /dev/null
@@ -0,0 +1,1113 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <xfs.h>
+
+/*
+ * Internal functions.
+ */
+
+/*
+ * Log specified fields for the inode given by bp and off.
+ */
+STATIC void
+xfs_ialloc_log_di(
+       xfs_trans_t     *tp,            /* transaction pointer */
+       xfs_buf_t       *bp,            /* inode buffer */
+       int             off,            /* index of inode in buffer */
+       int             fields)         /* bitmask of fields to log */
+{
+       int                     first;          /* first byte number */
+       int                     ioffset;        /* off in bytes */
+       int                     last;           /* last byte number */
+       xfs_mount_t             *mp;            /* mount point structure */
+       static const short      offsets[] = {   /* field offsets */
+                                               /* keep in sync with bits */
+               offsetof(xfs_dinode_core_t, di_magic),
+               offsetof(xfs_dinode_core_t, di_mode),
+               offsetof(xfs_dinode_core_t, di_version),
+               offsetof(xfs_dinode_core_t, di_format),
+               offsetof(xfs_dinode_core_t, di_onlink),
+               offsetof(xfs_dinode_core_t, di_uid),
+               offsetof(xfs_dinode_core_t, di_gid),
+               offsetof(xfs_dinode_core_t, di_nlink),
+               offsetof(xfs_dinode_core_t, di_projid),
+               offsetof(xfs_dinode_core_t, di_pad),
+               offsetof(xfs_dinode_core_t, di_atime),
+               offsetof(xfs_dinode_core_t, di_mtime),
+               offsetof(xfs_dinode_core_t, di_ctime),
+               offsetof(xfs_dinode_core_t, di_size),
+               offsetof(xfs_dinode_core_t, di_nblocks),
+               offsetof(xfs_dinode_core_t, di_extsize),
+               offsetof(xfs_dinode_core_t, di_nextents),
+               offsetof(xfs_dinode_core_t, di_anextents),
+               offsetof(xfs_dinode_core_t, di_forkoff),
+               offsetof(xfs_dinode_core_t, di_aformat),
+               offsetof(xfs_dinode_core_t, di_dmevmask),
+               offsetof(xfs_dinode_core_t, di_dmstate),
+               offsetof(xfs_dinode_core_t, di_flags),
+               offsetof(xfs_dinode_core_t, di_gen),
+               offsetof(xfs_dinode_t, di_next_unlinked),
+               offsetof(xfs_dinode_t, di_u),
+               offsetof(xfs_dinode_t, di_a),
+               sizeof(xfs_dinode_t)
+       };
+        
+
+       ASSERT(offsetof(xfs_dinode_t, di_core) == 0);
+       ASSERT((fields & (XFS_DI_U|XFS_DI_A)) == 0);
+       mp = tp->t_mountp;
+       /*
+        * Get the inode-relative first and last bytes for these fields
+        */
+       xfs_btree_offsets(fields, offsets, XFS_DI_NUM_BITS, &first, &last);
+       /*
+        * Convert to buffer offsets and log it.
+        */
+       ioffset = off << mp->m_sb.sb_inodelog;
+       first += ioffset;
+       last += ioffset;
+       xfs_trans_log_buf(tp, bp, first, last);
+}
+
+/*
+ * Allocation group level functions.
+ */
+
+/*
+ * Allocate new inodes in the allocation group specified by agbp.
+ * Return 0 for success, else error code.
+ */
+STATIC int                             /* error code or 0 */
+xfs_ialloc_ag_alloc(
+       xfs_trans_t     *tp,            /* transaction pointer */
+       xfs_buf_t       *agbp,          /* alloc group buffer */
+       int             *alloc)
+{
+       xfs_agi_t       *agi;           /* allocation group header */
+       xfs_alloc_arg_t args;           /* allocation argument structure */
+       int             blks_per_cluster;  /* fs blocks per inode cluster */
+       xfs_btree_cur_t *cur;           /* inode btree cursor */
+       xfs_daddr_t             d;              /* disk addr of buffer */
+       int             error;
+       xfs_buf_t       *fbuf;          /* new free inodes' buffer */
+       xfs_dinode_t    *free;          /* new free inode structure */
+       int             i;              /* inode counter */
+       int             j;              /* block counter */
+       int             nbufs;          /* num bufs of new inodes */
+       xfs_agino_t     newino;         /* new first inode's number */
+       xfs_agino_t     newlen;         /* new number of inodes */
+       int             ninodes;        /* num inodes per buf */
+       xfs_agino_t     thisino;        /* current inode number, for loop */
+       int             version;        /* inode version number to use */
+       static xfs_timestamp_t ztime;   /* zero xfs timestamp */
+       int             isaligned;      /* inode allocation at stripe unit */
+                                       /* boundary */
+        xfs_dinode_core_t dic;          /* a dinode_core to copy to new */
+                                        /* inodes */
+        
+       args.tp = tp;
+       args.mp = tp->t_mountp;
+
+       /*
+        * Locking will ensure that we don't have two callers in here
+        * at one time.
+        */
+       newlen = XFS_IALLOC_INODES(args.mp);
+       if (args.mp->m_maxicount &&
+           args.mp->m_sb.sb_icount + newlen > args.mp->m_maxicount)
+               return XFS_ERROR(ENOSPC);
+       args.minlen = args.maxlen = XFS_IALLOC_BLOCKS(args.mp);
+       /*
+        * Set the alignment for the allocation.
+        * If stripe alignment is turned on then align at stripe unit
+        * boundary.
+        * If the cluster size is smaller than a filesystem block 
+        * then we're doing I/O for inodes in filesystem block size pieces,
+        * so don't need alignment anyway.
+        */
+       isaligned = 0;
+       if (args.mp->m_sinoalign) {
+               ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN));
+               args.alignment = args.mp->m_dalign;
+               isaligned = 1;
+       } else if (XFS_SB_VERSION_HASALIGN(&args.mp->m_sb) &&
+           args.mp->m_sb.sb_inoalignmt >= 
+           XFS_B_TO_FSBT(args.mp, XFS_INODE_CLUSTER_SIZE(args.mp)))
+               args.alignment = args.mp->m_sb.sb_inoalignmt;
+       else
+               args.alignment = 1;
+       agi = XFS_BUF_TO_AGI(agbp);
+       /*
+        * Need to figure out where to allocate the inode blocks.
+        * Ideally they should be spaced out through the a.g.
+        * For now, just allocate blocks up front.
+        */
+       args.agbno = INT_GET(agi->agi_root, ARCH_CONVERT);
+       args.fsbno = XFS_AGB_TO_FSB(args.mp, INT_GET(agi->agi_seqno, ARCH_CONVERT),
+                                   args.agbno);
+       /*
+        * Allocate a fixed-size extent of inodes.
+        */
+       args.type = XFS_ALLOCTYPE_NEAR_BNO;
+       args.mod = args.total = args.wasdel = args.isfl = args.userdata = 
+               args.minalignslop = 0;
+       args.prod = 1;
+       /*
+        * Allow space for the inode btree to split.
+        */
+       args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1;
+       if (error = xfs_alloc_vextent(&args))
+               return error;
+
+       /*
+        * If stripe alignment is turned on, then try again with cluster
+        * alignment.
+        */
+       if (isaligned && args.fsbno == NULLFSBLOCK) {
+               args.type = XFS_ALLOCTYPE_NEAR_BNO;
+               args.agbno = INT_GET(agi->agi_root, ARCH_CONVERT);
+               args.fsbno = XFS_AGB_TO_FSB(args.mp,
+                               INT_GET(agi->agi_seqno, ARCH_CONVERT), args.agbno);
+               if (XFS_SB_VERSION_HASALIGN(&args.mp->m_sb) &&
+                       args.mp->m_sb.sb_inoalignmt >= 
+                       XFS_B_TO_FSBT(args.mp, XFS_INODE_CLUSTER_SIZE(args.mp)))
+                               args.alignment = args.mp->m_sb.sb_inoalignmt;
+               else
+                       args.alignment = 1;
+               if (error = xfs_alloc_vextent(&args))
+                                return error;
+       }
+       
+       if (args.fsbno == NULLFSBLOCK) {
+               *alloc = 0;
+               return 0;
+       }
+       ASSERT(args.len == args.minlen);
+       /*
+        * Convert the results.
+        */
+       newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0);
+       /*
+        * Loop over the new block(s), filling in the inodes.
+        * For small block sizes, manipulate the inodes in buffers
+        * which are multiples of the blocks size.
+        */
+       if (args.mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(args.mp)) {
+               blks_per_cluster = 1;
+               nbufs = (int)args.len;
+               ninodes = args.mp->m_sb.sb_inopblock;
+       } else {
+               blks_per_cluster = XFS_INODE_CLUSTER_SIZE(args.mp) /
+                                  args.mp->m_sb.sb_blocksize;
+               nbufs = (int)args.len / blks_per_cluster;
+               ninodes = blks_per_cluster * args.mp->m_sb.sb_inopblock;
+       }
+       /*
+        * Figure out what version number to use in the inodes we create.
+        * If the superblock version has caught up to the one that supports
+        * the new inode format, then use the new inode version.  Otherwise
+        * use the old version so that old kernels will continue to be
+        * able to use the file system.
+        */
+       if (XFS_SB_VERSION_HASNLINK(&args.mp->m_sb))
+               version = XFS_DINODE_VERSION_2;
+       else
+               version = XFS_DINODE_VERSION_1;
+       for (j = 0; j < nbufs; j++) {
+               /*
+                * Get the block.
+                */
+               d = XFS_AGB_TO_DADDR(args.mp, INT_GET(agi->agi_seqno, ARCH_CONVERT),
+                                    args.agbno + (j * blks_per_cluster));
+               fbuf = xfs_trans_get_buf(tp, args.mp->m_ddev_targp, d,
+                                        args.mp->m_bsize * blks_per_cluster,
+                                        XFS_BUF_LOCK);
+               ASSERT(fbuf);
+               ASSERT(!XFS_BUF_GETERROR(fbuf));                
+               /*
+                * Loop over the inodes in this buffer.
+                */
+               INT_SET(dic.di_magic, ARCH_CONVERT, XFS_DINODE_MAGIC);
+               INT_ZERO(dic.di_mode, ARCH_CONVERT);
+               INT_SET(dic.di_version, ARCH_CONVERT, version);
+               INT_ZERO(dic.di_format, ARCH_CONVERT);
+               INT_ZERO(dic.di_onlink, ARCH_CONVERT);
+               INT_ZERO(dic.di_uid, ARCH_CONVERT);
+               INT_ZERO(dic.di_gid, ARCH_CONVERT);
+               INT_ZERO(dic.di_nlink, ARCH_CONVERT);
+               INT_ZERO(dic.di_projid, ARCH_CONVERT);
+               bzero(&(dic.di_pad[0]),sizeof(dic.di_pad));
+               INT_SET(dic.di_atime.t_sec, ARCH_CONVERT, ztime.t_sec);
+               INT_SET(dic.di_atime.t_nsec, ARCH_CONVERT, ztime.t_nsec);
+                
+               INT_SET(dic.di_mtime.t_sec, ARCH_CONVERT, ztime.t_sec);
+               INT_SET(dic.di_mtime.t_nsec, ARCH_CONVERT, ztime.t_nsec);
+                
+               INT_SET(dic.di_ctime.t_sec, ARCH_CONVERT, ztime.t_sec);
+               INT_SET(dic.di_ctime.t_nsec, ARCH_CONVERT, ztime.t_nsec);
+                
+               INT_ZERO(dic.di_size, ARCH_CONVERT);
+               INT_ZERO(dic.di_nblocks, ARCH_CONVERT);
+               INT_ZERO(dic.di_extsize, ARCH_CONVERT);
+               INT_ZERO(dic.di_nextents, ARCH_CONVERT);
+               INT_ZERO(dic.di_anextents, ARCH_CONVERT);
+               INT_ZERO(dic.di_forkoff, ARCH_CONVERT);
+               INT_ZERO(dic.di_aformat, ARCH_CONVERT);
+               INT_ZERO(dic.di_dmevmask, ARCH_CONVERT);
+               INT_ZERO(dic.di_dmstate, ARCH_CONVERT);
+               INT_ZERO(dic.di_flags, ARCH_CONVERT);
+               INT_ZERO(dic.di_gen, ARCH_CONVERT);
+                
+               for (i = 0; i < ninodes; i++) {
+                       free = XFS_MAKE_IPTR(args.mp, fbuf, i);
+                        bcopy (&dic, &(free->di_core), sizeof(xfs_dinode_core_t));
+                       INT_SET(free->di_next_unlinked, ARCH_CONVERT, NULLAGINO);
+                       xfs_ialloc_log_di(tp, fbuf, i,
+                               XFS_DI_CORE_BITS | XFS_DI_NEXT_UNLINKED);
+               }
+               xfs_trans_inode_alloc_buf(tp, fbuf);
+       }
+       INT_MOD(agi->agi_count, ARCH_CONVERT, newlen);
+       INT_MOD(agi->agi_freecount, ARCH_CONVERT, newlen);
+       mraccess(&args.mp->m_peraglock);
+       args.mp->m_perag[INT_GET(agi->agi_seqno, ARCH_CONVERT)].pagi_freecount += newlen;
+       mraccunlock(&args.mp->m_peraglock);
+       INT_SET(agi->agi_newino, ARCH_CONVERT, newino);
+       /*
+        * Insert records describing the new inode chunk into the btree.
+        */
+       cur = xfs_btree_init_cursor(args.mp, tp, agbp,
+                       INT_GET(agi->agi_seqno, ARCH_CONVERT),
+                       XFS_BTNUM_INO, (xfs_inode_t *)0, 0);
+       for (thisino = newino;
+            thisino < newino + newlen;
+            thisino += XFS_INODES_PER_CHUNK) {
+               if (error = xfs_inobt_lookup_eq(cur, thisino,
+                               XFS_INODES_PER_CHUNK, XFS_INOBT_ALL_FREE, &i)) {
+                       xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+                       return error;
+               }
+               ASSERT(i == 0);
+               if (error = xfs_inobt_insert(cur, &i)) {
+                       xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+                       return error;
+               }
+               ASSERT(i == 1);
+       }
+       xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+       /*
+        * Log allocation group header fields
+        */
+       xfs_ialloc_log_agi(tp, agbp,
+               XFS_AGI_COUNT | XFS_AGI_FREECOUNT | XFS_AGI_NEWINO);
+       /*
+        * Modify/log superblock values for inode count and inode free count.
+        */
+       xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, (long)newlen);
+       xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, (long)newlen);
+       *alloc = 1;
+       return 0;
+}
+
+/*
+ * Select an allocation group to look for a free inode in, based on the parent
+ * inode and then mode.  Return the allocation group buffer.
+ */
+STATIC xfs_buf_t *                     /* allocation group buffer */
+xfs_ialloc_ag_select(
+       xfs_trans_t     *tp,            /* transaction pointer */
+       xfs_ino_t       parent,         /* parent directory inode number */
+       mode_t          mode,           /* bits set to indicate file type */
+       int             okalloc)        /* ok to allocate more space */
+{
+       xfs_buf_t       *agbp;          /* allocation group header buffer */
+       xfs_agnumber_t  agcount;        /* number of ag's in the filesystem */
+       xfs_agnumber_t  agno;           /* current ag number */
+       int             flags;          /* alloc buffer locking flags */
+       xfs_extlen_t    ineed;          /* blocks needed for inode allocation */
+       xfs_extlen_t    longest;        /* longest extent available */
+       xfs_mount_t     *mp;            /* mount point structure */
+       int             needspace;      /* file mode implies space allocated */
+       xfs_perag_t     *pag;           /* per allocation group data */
+       xfs_agnumber_t  pagno;          /* parent (starting) ag number */
+
+       /*
+        * Files of these types need at least one block if length > 0
+        * (and they won't fit in the inode, but that's hard to figure out).
+        */
+       needspace = S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode);
+       mp = tp->t_mountp;
+       agcount = mp->m_sb.sb_agcount;
+       if (S_ISDIR(mode))
+               pagno = atomicIncWithWrap((int *)&mp->m_agirotor, agcount);
+       else
+               pagno = XFS_INO_TO_AGNO(mp, parent);
+       ASSERT(pagno < agcount);
+       /*
+        * Loop through allocation groups, looking for one with a little
+        * free space in it.  Note we don't look for free inodes, exactly.
+        * Instead, we include whether there is a need to allocate inodes
+        * to mean that blocks must be allocated for them, 
+        * if none are currently free.
+        */
+       agno = pagno;
+       flags = XFS_ALLOC_FLAG_TRYLOCK;
+       for (;;) {
+               mraccess(&mp->m_peraglock);
+               pag = &mp->m_perag[agno];
+               if (!pag->pagi_init) {
+                       if (xfs_ialloc_read_agi(mp, tp, agno, &agbp)) {
+                               agbp = NULL;
+                               mraccunlock(&mp->m_peraglock);
+                               goto nextag;
+                       }
+               } else
+                       agbp = NULL;
+               /*
+                * Is there enough free space for the file plus a block
+                * of inodes (if we need to allocate some)?
+                */
+               ineed = pag->pagi_freecount ? 0 : XFS_IALLOC_BLOCKS(mp);
+               if (ineed && !pag->pagf_init) {
+                       if (agbp == NULL &&
+                           xfs_ialloc_read_agi(mp, tp, agno, &agbp)) {
+                               agbp = NULL;
+                               mraccunlock(&mp->m_peraglock);
+                               goto nextag;
+                       }
+                       (void)xfs_alloc_pagf_init(mp, tp, agno, flags);
+               }
+               if (!ineed || pag->pagf_init) {
+                       if (ineed && !(longest = pag->pagf_longest))
+                               longest = pag->pagf_flcount > 0;
+                       if (!ineed ||
+                           (pag->pagf_freeblks >= needspace + ineed &&
+                            longest >= ineed &&
+                            okalloc)) {
+                               if (agbp == NULL &&
+                                   xfs_ialloc_read_agi(mp, tp, agno, &agbp)) {
+                                       agbp = NULL;
+                                       mraccunlock(&mp->m_peraglock);
+                                       goto nextag;
+                               }
+                               mraccunlock(&mp->m_peraglock);
+                               return agbp;
+                       }
+               }
+               mraccunlock(&mp->m_peraglock);
+               if (agbp)
+                       xfs_trans_brelse(tp, agbp);
+nextag:                
+               /*   
+                * No point in iterating over the rest, if we're shutting
+                * down.
+                */
+               if (XFS_FORCED_SHUTDOWN(mp))
+                       return (xfs_buf_t *)0;
+               agno++;
+               if (agno == agcount)
+                       agno = 0;
+               if (agno == pagno) {
+                       if (flags == 0)
+                               return (xfs_buf_t *)0;
+                       flags = 0;
+               }
+       }
+}
+
+/* 
+ * Visible inode allocation functions.
+ */
+
+/*
+ * Allocate an inode on disk.
+ * Mode is used to tell whether the new inode will need space, and whether
+ * it is a directory.
+ *
+ * The arguments IO_agbp and alloc_done are defined to work within
+ * the constraint of one allocation per transaction.
+ * xfs_dialloc() is designed to be called twice if it has to do an
+ * allocation to make more free inodes.  On the first call,
+ * IO_agbp should be set to NULL. If an inode is available,
+ * i.e., xfs_dialloc() did not need to do an allocation, an inode
+ * number is returned.  In this case, IO_agbp would be set to the 
+ * current ag_buf and alloc_done set to false.
+ * If an allocation needed to be done, xfs_dialloc would return
+ * the current ag_buf in IO_agbp and set alloc_done to true.
+ * The caller should then commit the current transaction, allocate a new
+ * transaction, and call xfs_dialloc() again, passing in the previous
+ * value of IO_agbp.  IO_agbp should be held across the transactions.
+ * Since the agbp is locked across the two calls, the second call is
+ * guaranteed to have a free inode available.
+ *
+ * Once we successfully pick an inode its number is returned and the
+ * on-disk data structures are updated.  The inode itself is not read
+ * in, since doing so would break ordering constraints with xfs_reclaim.
+ */
+int
+xfs_dialloc(
+       xfs_trans_t     *tp,            /* transaction pointer */
+       xfs_ino_t       parent,         /* parent inode (directory) */
+       mode_t          mode,           /* mode bits for new inode */
+       int             okalloc,        /* ok to allocate more space */
+       xfs_buf_t       **IO_agbp,      /* in/out ag header's buffer */
+       boolean_t       *alloc_done,    /* true if we needed to replenish
+                                          inode freelist */
+       xfs_ino_t       *inop)          /* inode number allocated */
+{
+       xfs_agnumber_t  agcount;        /* number of allocation groups */
+       xfs_buf_t       *agbp;          /* allocation group header's buffer */
+       xfs_agnumber_t  agno;           /* allocation group number */
+       xfs_agi_t       *agi;           /* allocation group header structure */
+       xfs_btree_cur_t *cur;           /* inode allocation btree cursor */
+       int             error;          /* error return value */
+       int             i;              /* result code */
+       int             ialloced;       /* inode allocation status */
+       int             noroom = 0;     /* no space for inode blk allocation */
+       xfs_ino_t       ino;            /* fs-relative inode to be returned */
+       /* REFERENCED */
+       int             j;              /* result code */
+       xfs_mount_t     *mp;            /* file system mount structure */
+       int             offset;         /* index of inode in chunk */
+       xfs_agino_t     pagino;         /* parent's a.g. relative inode # */
+       xfs_agnumber_t  pagno;          /* parent's allocation group number */
+       xfs_inobt_rec_t rec;            /* inode allocation record */
+       xfs_agnumber_t  tagno;          /* testing allocation group number */
+       xfs_btree_cur_t *tcur;          /* temp cursor */
+       xfs_inobt_rec_t trec;           /* temp inode allocation record */
+
+
+       if (*IO_agbp == NULL) {
+               /*
+                * We do not have an agbp, so select an initial allocation
+                * group for inode allocation.
+                */
+               agbp = xfs_ialloc_ag_select(tp, parent, mode, okalloc);
+               /*
+                * Couldn't find an allocation group satisfying the 
+                * criteria, give up.
+                */
+               if (!agbp) {
+                       *inop = NULLFSINO;
+                       return 0;
+               }
+               agi = XFS_BUF_TO_AGI(agbp);
+               ASSERT(INT_GET(agi->agi_magicnum, ARCH_CONVERT) == XFS_AGI_MAGIC);
+       } else {
+               /*
+                * Continue where we left off before.  In this case, we 
+                * know that the allocation group has free inodes.
+                */
+               agbp = *IO_agbp;
+               agi = XFS_BUF_TO_AGI(agbp);
+               ASSERT(INT_GET(agi->agi_magicnum, ARCH_CONVERT) == XFS_AGI_MAGIC);
+               ASSERT(INT_GET(agi->agi_freecount, ARCH_CONVERT) > 0);
+       }
+       mp = tp->t_mountp;
+       agcount = mp->m_sb.sb_agcount;
+       agno = INT_GET(agi->agi_seqno, ARCH_CONVERT);
+       tagno = agno;
+       pagno = XFS_INO_TO_AGNO(mp, parent);
+       pagino = XFS_INO_TO_AGINO(mp, parent);
+
+       /*
+        * If we have already hit the ceiling of inode blocks then clear
+        * okalloc so we scan all available agi structures for a free
+        * inode.
+        */
+
+       if (mp->m_maxicount &&
+           mp->m_sb.sb_icount + XFS_IALLOC_INODES(mp) > mp->m_maxicount) {
+               noroom = 1;
+               okalloc = 0;
+       }
+
+       /*
+        * Loop until we find an allocation group that either has free inodes
+        * or in which we can allocate some inodes.  Iterate through the
+        * allocation groups upward, wrapping at the end.
+        */
+       *alloc_done = B_FALSE;
+       while (INT_GET(agi->agi_freecount, ARCH_CONVERT) == 0) {
+               /* 
+                * Don't do anything if we're not supposed to allocate
+                * any blocks, just go on to the next ag.
+                */
+               if (okalloc) {
+                       /*
+                        * Try to allocate some new inodes in the allocation
+                        * group.
+                        */
+                       if (error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced)) {
+                               xfs_trans_brelse(tp, agbp);
+                               if (error == ENOSPC) {
+                                       *inop = NULLFSINO;
+                                       return 0;
+                               } else
+                                       return error;
+                       }
+                       if (ialloced) {
+                               /*
+                                * We successfully allocated some inodes, return
+                                * the current context to the caller so that it
+                                * can commit the current transaction and call
+                                * us again where we left off.
+                                */
+                               ASSERT(INT_GET(agi->agi_freecount, ARCH_CONVERT) > 0);
+                               *alloc_done = B_TRUE;
+                               *IO_agbp = agbp;
+                               *inop = NULLFSINO;
+                               return 0;
+                       }
+               }
+               /*
+                * If it failed, give up on this ag.
+                */
+               xfs_trans_brelse(tp, agbp);
+               /*
+                * Go on to the next ag: get its ag header.
+                */
+nextag:
+               if (++tagno == agcount)
+                       tagno = 0;
+               if (tagno == agno) {
+                       *inop = NULLFSINO;
+                       return noroom ? ENOSPC : 0;
+               }
+               mraccess(&mp->m_peraglock);
+               error = xfs_ialloc_read_agi(mp, tp, tagno, &agbp);
+               mraccunlock(&mp->m_peraglock);
+               if (error)
+                       goto nextag;
+               agi = XFS_BUF_TO_AGI(agbp);
+               ASSERT(INT_GET(agi->agi_magicnum, ARCH_CONVERT) == XFS_AGI_MAGIC);
+       }
+       /*
+        * Here with an allocation group that has a free inode.
+        * Reset agno since we may have chosen a new ag in the
+        * loop above.
+        */
+       agno = tagno;
+       *IO_agbp = NULL;
+       cur = xfs_btree_init_cursor(mp, tp, agbp, INT_GET(agi->agi_seqno, ARCH_CONVERT),
+                                   XFS_BTNUM_INO, (xfs_inode_t *)0, 0);
+       /*
+        * If pagino is 0 (this is the root inode allocation) use newino.
+        * This must work because we've just allocated some.
+        */
+       if (!pagino)
+               pagino = INT_GET(agi->agi_newino, ARCH_CONVERT);
+#ifdef DEBUG
+       if (cur->bc_nlevels == 1) {
+               int     freecount = 0;
+
+               if (error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))
+                       goto error0;
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               do {
+                       if (error = xfs_inobt_get_rec(cur, &rec.ir_startino,
+                                       &rec.ir_freecount, &rec.ir_free, &i, ARCH_NOCONVERT))
+                               goto error0;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+                       freecount += rec.ir_freecount;
+                       if (error = xfs_inobt_increment(cur, 0, &i))
+                               goto error0;
+               } while (i == 1);
+
+               ASSERT(freecount == INT_GET(agi->agi_freecount, ARCH_CONVERT) ||
+                      XFS_FORCED_SHUTDOWN(mp));
+       }
+#endif
+       /*
+        * If in the same a.g. as the parent, try to get near the parent.
+        */
+       if (pagno == agno) {
+               if (error = xfs_inobt_lookup_le(cur, pagino, 0, 0, &i))
+                       goto error0;
+               if (i != 0 &&
+                   (error = xfs_inobt_get_rec(cur, &rec.ir_startino,
+                           &rec.ir_freecount, &rec.ir_free, &j, ARCH_NOCONVERT)) == 0 &&
+                   j == 1 &&
+                   rec.ir_freecount > 0) {
+                       /*
+                        * Found a free inode in the same chunk
+                        * as parent, done.
+                        */
+               }
+               /*
+                * In the same a.g. as parent, but parent's chunk is full.
+                */
+               else {
+                       int     doneleft;       /* done, to the left */
+                       int     doneright;      /* done, to the right */
+
+                       if (error)
+                               goto error0;
+                       ASSERT(i == 1);
+                       ASSERT(j == 1);
+                       /*
+                        * Duplicate the cursor, search left & right
+                        * simultaneously.
+                        */
+                       if (error = xfs_btree_dup_cursor(cur, &tcur))
+                               goto error0;
+                       /*
+                        * Search left with tcur, back up 1 record.
+                        */
+                       if (error = xfs_inobt_decrement(tcur, 0, &i))
+                               goto error1;
+                       doneleft = !i;
+                       if (!doneleft) {
+                               if (error = xfs_inobt_get_rec(tcur,
+                                               &trec.ir_startino,
+                                               &trec.ir_freecount,
+                                               &trec.ir_free, &i, ARCH_NOCONVERT))
+                                       goto error1;
+                               XFS_WANT_CORRUPTED_GOTO(i == 1, error1);
+                       }
+                       /* 
+                        * Search right with cur, go forward 1 record.
+                        */
+                       if (error = xfs_inobt_increment(cur, 0, &i))
+                               goto error1;
+                       doneright = !i;
+                       if (!doneright) {
+                               if (error = xfs_inobt_get_rec(cur,
+                                               &rec.ir_startino,
+                                               &rec.ir_freecount,
+                                               &rec.ir_free, &i, ARCH_NOCONVERT))
+                                       goto error1;
+                               XFS_WANT_CORRUPTED_GOTO(i == 1, error1);
+                       }
+                       /*
+                        * Loop until we find the closest inode chunk
+                        * with a free one.
+                        */
+                       while (!doneleft || !doneright) {
+                               int     useleft;  /* using left inode
+                                                    chunk this time */
+
+                               /*
+                                * Figure out which block is closer,
+                                * if both are valid.
+                                */
+                               if (!doneleft && !doneright)
+                                       useleft =
+                                               pagino -
+                                               (trec.ir_startino +
+                                                XFS_INODES_PER_CHUNK - 1) <
+                                                rec.ir_startino - pagino;
+                               else
+                                       useleft = !doneleft;
+                               /*
+                                * If checking the left, does it have
+                                * free inodes?
+                                */
+                               if (useleft && trec.ir_freecount) {
+                                       /*
+                                        * Yes, set it up as the chunk to use.
+                                        */
+                                       rec = trec;
+                                       xfs_btree_del_cursor(cur,
+                                               XFS_BTREE_NOERROR);
+                                       cur = tcur;
+                                       break;
+                               }
+                               /*
+                                * If checking the right, does it have
+                                * free inodes?
+                                */
+                               if (!useleft && rec.ir_freecount) {
+                                       /*
+                                        * Yes, it's already set up.
+                                        */
+                                       xfs_btree_del_cursor(tcur,
+                                               XFS_BTREE_NOERROR);
+                                       break;
+                               }
+                               /*
+                                * If used the left, get another one
+                                * further left.
+                                */
+                               if (useleft) {
+                                       if (error = xfs_inobt_decrement(tcur, 0,
+                                                       &i))
+                                               goto error1;
+                                       doneleft = !i;
+                                       if (!doneleft) {
+                                               if (error = xfs_inobt_get_rec(
+                                                           tcur,
+                                                           &trec.ir_startino,
+                                                           &trec.ir_freecount,
+                                                           &trec.ir_free, &i, ARCH_NOCONVERT))
+                                                       goto error1;
+                                               XFS_WANT_CORRUPTED_GOTO(i == 1,
+                                                       error1);
+                                       }
+                               }
+                               /*
+                                * If used the right, get another one
+                                * further right.
+                                */
+                               else {
+                                       if (error = xfs_inobt_increment(cur, 0,
+                                                       &i))
+                                               goto error1;
+                                       doneright = !i;
+                                       if (!doneright) {
+                                               if (error = xfs_inobt_get_rec(
+                                                           cur,
+                                                           &rec.ir_startino,
+                                                           &rec.ir_freecount,
+                                                           &rec.ir_free, &i, ARCH_NOCONVERT))
+                                                       goto error1;
+                                               XFS_WANT_CORRUPTED_GOTO(i == 1,
+                                                       error1);
+                                       }
+                               }
+                       }
+                       ASSERT(!doneleft || !doneright);
+               }
+       }
+       /*
+        * In a different a.g. from the parent.
+        * See if the most recently allocated block has any free.
+        */
+       else if (INT_GET(agi->agi_newino, ARCH_CONVERT) != NULLAGINO) {
+               if (error = xfs_inobt_lookup_eq(cur,
+                               INT_GET(agi->agi_newino, ARCH_CONVERT), 0, 0, &i))
+                       goto error0;
+               if (i == 1 &&
+                   (error = xfs_inobt_get_rec(cur, &rec.ir_startino,
+                           &rec.ir_freecount, &rec.ir_free, &j, ARCH_NOCONVERT)) == 0 &&
+                   j == 1 &&
+                   rec.ir_freecount > 0) {
+                       /*
+                        * The last chunk allocated in the group still has
+                        * a free inode.
+                        */
+               }
+               /*
+                * None left in the last group, search the whole a.g.
+                */
+               else {
+                       if (error)
+                               goto error0;
+                       if (error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))
+                               goto error0;
+                       ASSERT(i == 1);
+                       for (;;) {
+                               if (error = xfs_inobt_get_rec(cur,
+                                               &rec.ir_startino,
+                                               &rec.ir_freecount, &rec.ir_free,
+                                               &i, ARCH_NOCONVERT))
+                                       goto error0;
+                               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+                               if (rec.ir_freecount > 0)
+                                       break;
+                               if (error = xfs_inobt_increment(cur, 0, &i))
+                                       goto error0;
+                               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+                       }
+               }
+       }
+       offset = XFS_IALLOC_FIND_FREE(&rec.ir_free);
+       ASSERT(offset >= 0);
+       ASSERT(offset < XFS_INODES_PER_CHUNK);
+       ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
+                                  XFS_INODES_PER_CHUNK) == 0);
+       ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset);
+       XFS_INOBT_CLR_FREE(&rec, offset, ARCH_NOCONVERT);
+       rec.ir_freecount--;
+       if (error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount,
+                       rec.ir_free))
+               goto error0;
+       INT_MOD(agi->agi_freecount, ARCH_CONVERT, -1);
+       xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
+       mraccess(&mp->m_peraglock);
+       mp->m_perag[tagno].pagi_freecount--;
+       mraccunlock(&mp->m_peraglock);
+#ifdef DEBUG
+       if (cur->bc_nlevels == 1) {
+               int     freecount = 0;
+
+               if (error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))
+                       goto error0;
+               do {
+                       if (error = xfs_inobt_get_rec(cur, &rec.ir_startino,
+                                       &rec.ir_freecount, &rec.ir_free, &i, ARCH_NOCONVERT))
+                               goto error0;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+                       freecount += rec.ir_freecount;
+                       if (error = xfs_inobt_increment(cur, 0, &i))
+                               goto error0;
+               } while (i == 1);
+               ASSERT(freecount == INT_GET(agi->agi_freecount, ARCH_CONVERT) ||
+                      XFS_FORCED_SHUTDOWN(mp));
+       }
+#endif
+       xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+       xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1);
+       *inop = ino;
+       return 0;
+error1:
+       xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
+error0:
+       xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+       return error;
+}
+
+
+/*
+ * Return the location of the inode in bno/off, for mapping it into a buffer.
+ */
+/*ARGSUSED*/
+int
+xfs_dilocate(
+       xfs_mount_t     *mp,    /* file system mount structure */
+       xfs_trans_t     *tp,    /* transaction pointer */
+       xfs_ino_t       ino,    /* inode to locate */
+       xfs_fsblock_t   *bno,   /* output: block containing inode */
+       int             *len,   /* output: num blocks in inode cluster */
+       int             *off,   /* output: index in block of inode */
+       uint            flags)  /* flags concerning inode lookup */          
+{
+       xfs_agblock_t   agbno;  /* block number of inode in the alloc group */
+       xfs_buf_t       *agbp;  /* agi buffer */
+       xfs_agino_t     agino;  /* inode number within alloc group */
+       xfs_agnumber_t  agno;   /* allocation group number */
+       int             blks_per_cluster; /* num blocks per inode cluster */
+       xfs_agblock_t   chunk_agbno;    /* first block in inode chunk */
+       xfs_agino_t     chunk_agino;    /* first agino in inode chunk */
+       __int32_t       chunk_cnt;      /* count of free inodes in chunk */
+       xfs_inofree_t   chunk_free;     /* mask of free inodes in chunk */
+       xfs_agblock_t   cluster_agbno;  /* first block in inode cluster */
+       xfs_btree_cur_t *cur;   /* inode btree cursor */
+       int             error;  /* error code */
+       int             i;      /* temp state */
+       int             offset; /* index of inode in its buffer */
+       int             offset_agbno;   /* blks from chunk start to inode */
+
+       ASSERT(ino != NULLFSINO);
+       /*
+        * Split up the inode number into its parts.
+        */
+       agno = XFS_INO_TO_AGNO(mp, ino);
+       agino = XFS_INO_TO_AGINO(mp, ino);
+       agbno = XFS_AGINO_TO_AGBNO(mp, agino);
+       if (agno >= mp->m_sb.sb_agcount || agbno >= mp->m_sb.sb_agblocks ||
+           ino != XFS_AGINO_TO_INO(mp, agno, agino))
+               return XFS_ERROR(EINVAL);
+       if ((mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) ||
+           !(flags & XFS_IMAP_LOOKUP)) {
+               offset = XFS_INO_TO_OFFSET(mp, ino);
+               ASSERT(offset < mp->m_sb.sb_inopblock);
+               *bno = XFS_AGB_TO_FSB(mp, agno, agbno);
+               *off = offset;
+               *len = 1;
+               return 0;
+       }
+       blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog;
+       if (*bno != NULLFSBLOCK) {
+               offset = XFS_INO_TO_OFFSET(mp, ino);
+               ASSERT(offset < mp->m_sb.sb_inopblock);
+               cluster_agbno = XFS_FSB_TO_AGBNO(mp, *bno);
+               *off = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) +
+                       offset;
+               *len = blks_per_cluster;
+               return 0;
+       }
+       if (mp->m_inoalign_mask) {
+               offset_agbno = agbno & mp->m_inoalign_mask;
+               chunk_agbno = agbno - offset_agbno;
+       } else {
+               mraccess(&mp->m_peraglock);
+               error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
+               mraccunlock(&mp->m_peraglock);
+               if (error)
+                       return error;
+               cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO,
+                       (xfs_inode_t *)0, 0);
+               if (error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i))
+                       goto error0;
+               if (error = xfs_inobt_get_rec(cur, &chunk_agino, &chunk_cnt,
+                               &chunk_free, &i, ARCH_NOCONVERT))
+                       goto error0;
+               if (i == 0)
+                       error = XFS_ERROR(EINVAL);
+               xfs_trans_brelse(tp, agbp);
+               xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);           
+               if (error)
+                       return error;
+               chunk_agbno = XFS_AGINO_TO_AGBNO(mp, chunk_agino);
+               offset_agbno = agbno - chunk_agbno;
+       }
+       ASSERT(agbno >= chunk_agbno);
+       cluster_agbno = chunk_agbno +
+               ((offset_agbno / blks_per_cluster) * blks_per_cluster);
+       offset = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) +
+               XFS_INO_TO_OFFSET(mp, ino);
+       *bno = XFS_AGB_TO_FSB(mp, agno, cluster_agbno);
+       *off = offset;
+       *len = blks_per_cluster;
+       return 0;
+error0:
+       xfs_trans_brelse(tp, agbp);
+       xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+       return error;
+}
+
+/*
+ * Compute and fill in value of m_in_maxlevels.
+ */
+void
+xfs_ialloc_compute_maxlevels(
+       xfs_mount_t     *mp)            /* file system mount structure */
+{
+       int             level;
+       uint            maxblocks;
+       uint            maxleafents;
+       int             minleafrecs;
+       int             minnoderecs;
+
+       maxleafents = (1LL << XFS_INO_AGINO_BITS(mp)) >>
+               XFS_INODES_PER_CHUNK_LOG;
+       minleafrecs = mp->m_alloc_mnr[0];
+       minnoderecs = mp->m_alloc_mnr[1];
+       maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
+       for (level = 1; maxblocks > 1; level++)
+               maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
+       mp->m_in_maxlevels = level;
+}
+
+/*
+ * Log specified fields for the ag hdr (inode section)
+ */
+void
+xfs_ialloc_log_agi(
+       xfs_trans_t     *tp,            /* transaction pointer */
+       xfs_buf_t       *bp,            /* allocation group header buffer */
+       int             fields)         /* bitmask of fields to log */
+{
+       int                     first;          /* first byte number */
+       int                     last;           /* last byte number */
+       static const short      offsets[] = {   /* field starting offsets */
+                                       /* keep in sync with bit definitions */
+               offsetof(xfs_agi_t, agi_magicnum),
+               offsetof(xfs_agi_t, agi_versionnum),
+               offsetof(xfs_agi_t, agi_seqno),
+               offsetof(xfs_agi_t, agi_length),
+               offsetof(xfs_agi_t, agi_count),
+               offsetof(xfs_agi_t, agi_root),
+               offsetof(xfs_agi_t, agi_level),
+               offsetof(xfs_agi_t, agi_freecount),
+               offsetof(xfs_agi_t, agi_newino),
+               offsetof(xfs_agi_t, agi_dirino),
+               offsetof(xfs_agi_t, agi_unlinked),
+               sizeof(xfs_agi_t)
+       };
+#ifdef DEBUG
+       xfs_agi_t               *agi;   /* allocation group header */
+
+       agi = XFS_BUF_TO_AGI(bp);
+       ASSERT(INT_GET(agi->agi_magicnum, ARCH_CONVERT) ==
+               XFS_AGI_MAGIC);
+#endif
+       /*
+        * Compute byte offsets for the first and last fields.
+        */
+       xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS, &first, &last);
+       /*
+        * Log the allocation group inode header buffer.
+        */
+       xfs_trans_log_buf(tp, bp, first, last);
+}
+
+/*
+ * Read in the allocation group header (inode allocation section)
+ */
+int
+xfs_ialloc_read_agi(
+       xfs_mount_t     *mp,            /* file system mount structure */
+       xfs_trans_t     *tp,            /* transaction pointer */
+       xfs_agnumber_t  agno,           /* allocation group number */
+       xfs_buf_t       **bpp)          /* allocation group hdr buf */
+{
+       xfs_agi_t       *agi;           /* allocation group header */
+       int             agi_ok;         /* agi is consistent */
+       xfs_buf_t       *bp;            /* allocation group hdr buf */
+    xfs_daddr_t                d;              /* disk block address */
+       int             error;
+#ifdef DEBUG
+       int             i;
+#endif
+       xfs_perag_t     *pag;           /* per allocation group data */
+
+
+       ASSERT(agno != NULLAGNUMBER);
+       d = XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR);
+       if (error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, 1, 0, &bp))
+               return error;
+       ASSERT(bp && !XFS_BUF_GETERROR(bp));
+       /*
+        * Validate the magic number of the agi block.
+        */
+       agi = XFS_BUF_TO_AGI(bp);
+       agi_ok =
+               INT_GET(agi->agi_magicnum, ARCH_CONVERT) == XFS_AGI_MAGIC &&
+               XFS_AGI_GOOD_VERSION(INT_GET(agi->agi_versionnum, ARCH_CONVERT));
+       if (XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI,
+                       XFS_RANDOM_IALLOC_READ_AGI)) {
+               xfs_trans_brelse(tp, bp);
+               return XFS_ERROR(EFSCORRUPTED);
+       }
+       pag = &mp->m_perag[agno];
+       if (!pag->pagi_init) {
+               pag->pagi_freecount = INT_GET(agi->agi_freecount, ARCH_CONVERT);
+               pag->pagi_init = 1;
+       } else {
+               /*
+                * It's possible for these to be out of sync if
+                * we are in the middle of a forced shutdown.
+                */
+               ASSERT(pag->pagi_freecount == INT_GET(agi->agi_freecount, ARCH_CONVERT)
+                       || XFS_FORCED_SHUTDOWN(mp));
+       }
+#ifdef DEBUG
+       for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++)
+               ASSERT(INT_GET(agi->agi_unlinked[i], ARCH_CONVERT) != 0);
+#endif
+       XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGI, XFS_AGI_REF);
+       *bpp = bp;
+       return 0;
+}
diff --git a/libxfs/xfs_ialloc_btree.c b/libxfs/xfs_ialloc_btree.c
new file mode 100644 (file)
index 0000000..bdf2dae
--- /dev/null
@@ -0,0 +1,1552 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+/*
+ * Inode allocation management for XFS.
+ */
+#include <xfs.h>
+
+/*
+ * Insert one record/level.  Return information to the caller
+ * allowing the next level up to proceed if necessary.
+ */
+STATIC int                             /* error */
+xfs_inobt_insrec(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       int                     level,  /* level to insert record at */
+       xfs_agblock_t           *bnop,  /* i/o: block number inserted */
+       xfs_inobt_rec_t         *recp,  /* i/o: record data inserted */
+       xfs_btree_cur_t         **curp, /* output: new cursor replacing cur */
+       int                     *stat)  /* success/failure */
+{
+       xfs_inobt_block_t       *block; /* btree block record/key lives in */
+       xfs_buf_t               *bp;    /* buffer for block */
+       int                     error;  /* error return value */
+       int                     i;      /* loop index */
+       xfs_inobt_key_t         key;    /* key value being inserted */
+       xfs_inobt_key_t         *kp;    /* pointer to btree keys */
+       xfs_agblock_t           nbno;   /* block number of allocated block */
+       xfs_btree_cur_t         *ncur;  /* new cursor to be used at next lvl */
+       xfs_inobt_key_t         nkey;   /* new key value, from split */
+       xfs_inobt_rec_t         nrec;   /* new record value, for caller */
+       int                     optr;   /* old ptr value */
+       xfs_inobt_ptr_t         *pp;    /* pointer to btree addresses */
+       int                     ptr;    /* index in btree block for this rec */
+       xfs_inobt_rec_t         *rp;    /* pointer to btree records */
+
+       /*
+        * If we made it to the root level, allocate a new root block
+        * and we're done.
+        */
+       if (level >= cur->bc_nlevels) {
+               error = xfs_inobt_newroot(cur, &i);
+               *bnop = NULLAGBLOCK;
+               *stat = i;
+               return error;
+       }
+       /*
+        * Make a key out of the record data to be inserted, and save it.
+        */
+       key.ir_startino = recp->ir_startino; /* INT_: direct copy */
+       optr = ptr = cur->bc_ptrs[level];
+       /*
+        * If we're off the left edge, return failure.
+        */
+       if (ptr == 0) {
+               *stat = 0;
+               return 0;
+       }
+       /*
+        * Get pointers to the btree buffer and block.
+        */
+       bp = cur->bc_bufs[level];
+       block = XFS_BUF_TO_INOBT_BLOCK(bp);
+#ifdef DEBUG
+       if (error = xfs_btree_check_sblock(cur, block, level, bp))
+               return error;
+       /* 
+        * Check that the new entry is being inserted in the right place.
+        */
+       if (ptr <= INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
+               if (level == 0) {
+                       rp = XFS_INOBT_REC_ADDR(block, ptr, cur);
+                       xfs_btree_check_rec(cur->bc_btnum, recp, rp);
+               } else {
+                       kp = XFS_INOBT_KEY_ADDR(block, ptr, cur);
+                       xfs_btree_check_key(cur->bc_btnum, &key, kp);
+               }
+       }
+#endif
+       nbno = NULLAGBLOCK;
+       ncur = (xfs_btree_cur_t *)0;
+       /*
+        * If the block is full, we can't insert the new entry until we
+        * make the block un-full.
+        */
+       if (INT_GET(block->bb_numrecs, ARCH_CONVERT) == XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
+               /*
+                * First, try shifting an entry to the right neighbor.
+                */
+               if (error = xfs_inobt_rshift(cur, level, &i))
+                       return error;
+               if (i) {
+                       /* nothing */
+               }
+               /*
+                * Next, try shifting an entry to the left neighbor.
+                */
+               else {
+                       if (error = xfs_inobt_lshift(cur, level, &i))
+                               return error;
+                       if (i) {
+                               optr = ptr = cur->bc_ptrs[level];
+                       } else {
+                               /*
+                                * Next, try splitting the current block
+                                * in half. If this works we have to
+                                * re-set our variables because
+                                * we could be in a different block now.
+                                */
+                               if (error = xfs_inobt_split(cur, level, &nbno,
+                                               &nkey, &ncur, &i))
+                                       return error;
+                               if (i) {
+                                       bp = cur->bc_bufs[level];
+                                       block = XFS_BUF_TO_INOBT_BLOCK(bp);
+#ifdef DEBUG
+                                       if (error = xfs_btree_check_sblock(cur,
+                                                       block, level, bp))
+                                               return error;
+#endif
+                                       ptr = cur->bc_ptrs[level];
+                                       nrec.ir_startino = nkey.ir_startino; /* INT_: direct copy */
+                               } else {
+                                       /*
+                                        * Otherwise the insert fails.
+                                        */
+                                       *stat = 0;
+                                       return 0;
+                               }
+                       }
+               }
+       }
+       /*
+        * At this point we know there's room for our new entry in the block
+        * we're pointing at.
+        */
+       if (level > 0) {
+               /*
+                * It's a non-leaf entry.  Make a hole for the new data
+                * in the key and ptr regions of the block.
+                */
+               kp = XFS_INOBT_KEY_ADDR(block, 1, cur);
+               pp = XFS_INOBT_PTR_ADDR(block, 1, cur);
+#ifdef DEBUG
+               for (i = INT_GET(block->bb_numrecs, ARCH_CONVERT); i >= ptr; i--) {
+                       if (error = xfs_btree_check_sptr(cur, INT_GET(pp[i - 1], ARCH_CONVERT), level))
+                               return error;
+               }
+#endif
+               ovbcopy(&kp[ptr - 1], &kp[ptr],
+                       (INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr + 1) * sizeof(*kp));
+               ovbcopy(&pp[ptr - 1], &pp[ptr],
+                       (INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr + 1) * sizeof(*pp));
+               /*
+                * Now stuff the new data in, bump numrecs and log the new data.
+                */
+#ifdef DEBUG
+               if (error = xfs_btree_check_sptr(cur, *bnop, level))
+                       return error;
+#endif
+               kp[ptr - 1] = key; /* INT_: struct copy */
+               INT_SET(pp[ptr - 1], ARCH_CONVERT, *bnop);
+               INT_MOD(block->bb_numrecs, ARCH_CONVERT, +1);
+               xfs_inobt_log_keys(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT));
+               xfs_inobt_log_ptrs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT));
+       } else {
+               /*
+                * It's a leaf entry.  Make a hole for the new record.
+                */
+               rp = XFS_INOBT_REC_ADDR(block, 1, cur);
+               ovbcopy(&rp[ptr - 1], &rp[ptr],
+                       (INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr + 1) * sizeof(*rp));
+               /*
+                * Now stuff the new record in, bump numrecs
+                * and log the new data.
+                */
+               rp[ptr - 1] = *recp; /* INT_: struct copy */
+               INT_MOD(block->bb_numrecs, ARCH_CONVERT, +1);
+               xfs_inobt_log_recs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT));
+       }
+       /*
+        * Log the new number of records in the btree header.
+        */
+       xfs_inobt_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
+#ifdef DEBUG
+       /*
+        * Check that the key/record is in the right place, now.
+        */
+       if (ptr < INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
+               if (level == 0)
+                       xfs_btree_check_rec(cur->bc_btnum, rp + ptr - 1,
+                               rp + ptr);
+               else
+                       xfs_btree_check_key(cur->bc_btnum, kp + ptr - 1,
+                               kp + ptr);
+       }
+#endif
+       /*
+        * If we inserted at the start of a block, update the parents' keys.
+        */
+       if (optr == 1 && (error = xfs_inobt_updkey(cur, &key, level + 1)))
+               return error;
+       /*
+        * Return the new block number, if any.
+        * If there is one, give back a record value and a cursor too.
+        */
+       *bnop = nbno;
+       if (nbno != NULLAGBLOCK) {
+               *recp = nrec; /* INT_: struct copy */
+               *curp = ncur;
+       }
+       *stat = 1;
+       return 0;
+}
+
+/*
+ * Log header fields from a btree block.
+ */
+STATIC void
+xfs_inobt_log_block(
+       xfs_trans_t             *tp,    /* transaction pointer */
+       xfs_buf_t               *bp,    /* buffer containing btree block */
+       int                     fields) /* mask of fields: XFS_BB_... */
+{
+       int                     first;  /* first byte offset logged */
+       int                     last;   /* last byte offset logged */
+       static const short      offsets[] = {   /* table of offsets */
+               offsetof(xfs_inobt_block_t, bb_magic),
+               offsetof(xfs_inobt_block_t, bb_level),
+               offsetof(xfs_inobt_block_t, bb_numrecs),
+               offsetof(xfs_inobt_block_t, bb_leftsib),
+               offsetof(xfs_inobt_block_t, bb_rightsib),
+               sizeof(xfs_inobt_block_t)
+       };
+
+       xfs_btree_offsets(fields, offsets, XFS_BB_NUM_BITS, &first, &last);
+       xfs_trans_log_buf(tp, bp, first, last);
+}
+
+/*
+ * Log keys from a btree block (nonleaf).
+ */
+STATIC void
+xfs_inobt_log_keys(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       xfs_buf_t               *bp,    /* buffer containing btree block */
+       int                     kfirst, /* index of first key to log */
+       int                     klast)  /* index of last key to log */
+{
+       xfs_inobt_block_t       *block; /* btree block to log from */
+       int                     first;  /* first byte offset logged */
+       xfs_inobt_key_t         *kp;    /* key pointer in btree block */
+       int                     last;   /* last byte offset logged */
+
+       block = XFS_BUF_TO_INOBT_BLOCK(bp);
+       kp = XFS_INOBT_KEY_ADDR(block, 1, cur);
+       first = (int)((xfs_caddr_t)&kp[kfirst - 1] - (xfs_caddr_t)block);
+       last = (int)(((xfs_caddr_t)&kp[klast] - 1) - (xfs_caddr_t)block);
+       xfs_trans_log_buf(cur->bc_tp, bp, first, last);
+}
+
+/*
+ * Log block pointer fields from a btree block (nonleaf).
+ */
+STATIC void
+xfs_inobt_log_ptrs(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       xfs_buf_t               *bp,    /* buffer containing btree block */
+       int                     pfirst, /* index of first pointer to log */
+       int                     plast)  /* index of last pointer to log */
+{
+       xfs_inobt_block_t       *block; /* btree block to log from */
+       int                     first;  /* first byte offset logged */
+       int                     last;   /* last byte offset logged */
+       xfs_inobt_ptr_t         *pp;    /* block-pointer pointer in btree blk */
+
+       block = XFS_BUF_TO_INOBT_BLOCK(bp);
+       pp = XFS_INOBT_PTR_ADDR(block, 1, cur);
+       first = (int)((xfs_caddr_t)&pp[pfirst - 1] - (xfs_caddr_t)block);
+       last = (int)(((xfs_caddr_t)&pp[plast] - 1) - (xfs_caddr_t)block);
+       xfs_trans_log_buf(cur->bc_tp, bp, first, last);
+}
+
+/*
+ * Log records from a btree block (leaf).
+ */
+STATIC void
+xfs_inobt_log_recs(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       xfs_buf_t               *bp,    /* buffer containing btree block */
+       int                     rfirst, /* index of first record to log */
+       int                     rlast)  /* index of last record to log */
+{
+       xfs_inobt_block_t       *block; /* btree block to log from */
+       int                     first;  /* first byte offset logged */
+       int                     last;   /* last byte offset logged */
+       xfs_inobt_rec_t         *rp;    /* record pointer for btree block */
+
+       block = XFS_BUF_TO_INOBT_BLOCK(bp);
+       rp = XFS_INOBT_REC_ADDR(block, 1, cur);
+       first = (int)((xfs_caddr_t)&rp[rfirst - 1] - (xfs_caddr_t)block);
+       last = (int)(((xfs_caddr_t)&rp[rlast] - 1) - (xfs_caddr_t)block);
+       xfs_trans_log_buf(cur->bc_tp, bp, first, last);
+}
+
+/*
+ * Lookup the record.  The cursor is made to point to it, based on dir.
+ * Return 0 if can't find any such record, 1 for success.
+ */
+STATIC int                             /* error */
+xfs_inobt_lookup(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       xfs_lookup_t            dir,    /* <=, ==, or >= */
+       int                     *stat)  /* success/failure */
+{
+       xfs_agblock_t           agbno;  /* a.g. relative btree block number */
+       xfs_agnumber_t          agno;   /* allocation group number */
+       xfs_inobt_block_t       *block; /* current btree block */
+       int                     diff;   /* difference for the current key */
+       int                     error;  /* error return value */
+       int                     keyno;  /* current key number */
+       int                     level;  /* level in the btree */
+       xfs_mount_t             *mp;    /* file system mount point */
+
+       /*
+        * Get the allocation group header, and the root block number.
+        */
+       mp = cur->bc_mp;
+       {
+               xfs_agi_t       *agi;   /* a.g. inode header */
+
+               agi = XFS_BUF_TO_AGI(cur->bc_private.i.agbp);
+               agno = INT_GET(agi->agi_seqno, ARCH_CONVERT);
+               agbno = INT_GET(agi->agi_root, ARCH_CONVERT);
+       }
+       /*
+        * Iterate over each level in the btree, starting at the root.
+        * For each level above the leaves, find the key we need, based
+        * on the lookup record, then follow the corresponding block
+        * pointer down to the next level.
+        */
+       for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
+               xfs_buf_t       *bp;    /* buffer pointer for btree block */
+               xfs_daddr_t             d;      /* disk address of btree block */
+
+               /*
+                * Get the disk address we're looking for.
+                */
+               d = XFS_AGB_TO_DADDR(mp, agno, agbno);
+               /*
+                * If the old buffer at this level is for a different block,
+                * throw it away, otherwise just use it.
+                */
+               bp = cur->bc_bufs[level];
+               if (bp && XFS_BUF_ADDR(bp) != d)
+                       bp = (xfs_buf_t *)0;
+               if (!bp) {
+                       /*
+                        * Need to get a new buffer.  Read it, then 
+                        * set it in the cursor, releasing the old one.
+                        */
+                       if (error = xfs_btree_read_bufs(mp, cur->bc_tp,
+                                       agno, agbno, 0, &bp, XFS_INO_BTREE_REF))
+                               return error;
+                       xfs_btree_setbuf(cur, level, bp);
+                       /*
+                        * Point to the btree block, now that we have the buffer
+                        */
+                       block = XFS_BUF_TO_INOBT_BLOCK(bp);
+                       if (error = xfs_btree_check_sblock(cur, block, level,
+                                       bp))
+                               return error;
+               } else
+                       block = XFS_BUF_TO_INOBT_BLOCK(bp);
+               /*
+                * If we already had a key match at a higher level, we know
+                * we need to use the first entry in this block.
+                */
+               if (diff == 0)
+                       keyno = 1;
+               /*
+                * Otherwise we need to search this block.  Do a binary search.
+                */
+               else {
+                       int             high;   /* high entry number */
+                       xfs_inobt_key_t *kkbase;/* base of keys in block */
+                       xfs_inobt_rec_t *krbase;/* base of records in block */
+                       int             low;    /* low entry number */
+
+                       /*
+                        * Get a pointer to keys or records.
+                        */
+                       if (level > 0)
+                               kkbase = XFS_INOBT_KEY_ADDR(block, 1, cur);
+                       else
+                               krbase = XFS_INOBT_REC_ADDR(block, 1, cur);
+                       /*
+                        * Set low and high entry numbers, 1-based.
+                        */
+                       low = 1;
+                       if (!(high = INT_GET(block->bb_numrecs, ARCH_CONVERT))) {
+                               /*
+                                * If the block is empty, the tree must
+                                * be an empty leaf.
+                                */
+                               ASSERT(level == 0 && cur->bc_nlevels == 1);
+                               cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
+                               *stat = 0;
+                               return 0;
+                       }
+                       /*
+                        * Binary search the block.
+                        */
+                       while (low <= high) {
+                               xfs_agino_t     startino;       /* key value */
+
+                               /*
+                                * keyno is average of low and high.
+                                */
+                               keyno = (low + high) >> 1;
+                               /*
+                                * Get startino.
+                                */
+                               if (level > 0) {
+                                       xfs_inobt_key_t *kkp;
+
+                                       kkp = kkbase + keyno - 1;
+                                       startino = INT_GET(kkp->ir_startino, ARCH_CONVERT);
+                               } else {
+                                       xfs_inobt_rec_t *krp;
+
+                                       krp = krbase + keyno - 1;
+                                       startino = INT_GET(krp->ir_startino, ARCH_CONVERT);
+                               }
+                               /*
+                                * Compute difference to get next direction.
+                                */
+                               diff = (int)startino - cur->bc_rec.i.ir_startino;
+                               /*
+                                * Less than, move right.
+                                */
+                               if (diff < 0)
+                                       low = keyno + 1;
+                               /*
+                                * Greater than, move left.
+                                */
+                               else if (diff > 0)
+                                       high = keyno - 1;
+                               /*
+                                * Equal, we're done.
+                                */
+                               else
+                                       break;
+                       }
+               }
+               /*
+                * If there are more levels, set up for the next level
+                * by getting the block number and filling in the cursor.
+                */
+               if (level > 0) {
+                       /*
+                        * If we moved left, need the previous key number,
+                        * unless there isn't one.
+                        */
+                       if (diff > 0 && --keyno < 1)
+                               keyno = 1;
+                       agbno = INT_GET(*XFS_INOBT_PTR_ADDR(block, keyno, cur), ARCH_CONVERT);
+#ifdef DEBUG
+                       if (error = xfs_btree_check_sptr(cur, agbno, level))
+                               return error;
+#endif
+                       cur->bc_ptrs[level] = keyno;
+               }
+       }
+       /*
+        * Done with the search.
+        * See if we need to adjust the results.
+        */
+       if (dir != XFS_LOOKUP_LE && diff < 0) {
+               keyno++;
+               /*
+                * If ge search and we went off the end of the block, but it's
+                * not the last block, we're in the wrong block.
+                */
+               if (dir == XFS_LOOKUP_GE &&
+                   keyno > INT_GET(block->bb_numrecs, ARCH_CONVERT) &&
+                   INT_GET(block->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) {
+                       int     i;
+
+                       cur->bc_ptrs[0] = keyno;
+                       if (error = xfs_inobt_increment(cur, 0, &i))
+                               return error;
+                       ASSERT(i == 1);
+                       *stat = 1;
+                       return 0;
+               }
+       }
+       else if (dir == XFS_LOOKUP_LE && diff > 0)
+               keyno--;
+       cur->bc_ptrs[0] = keyno;
+       /*
+        * Return if we succeeded or not.
+        */
+       if (keyno == 0 || keyno > INT_GET(block->bb_numrecs, ARCH_CONVERT))
+               *stat = 0;
+       else
+               *stat = ((dir != XFS_LOOKUP_EQ) || (diff == 0));
+       return 0;
+}
+
+/*
+ * Move 1 record left from cur/level if possible.
+ * Update cur to reflect the new path.
+ */
+STATIC int                             /* error */
+xfs_inobt_lshift(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       int                     level,  /* level to shift record on */
+       int                     *stat)  /* success/failure */
+{
+       int                     error;  /* error return value */
+#ifdef DEBUG
+       int                     i;      /* loop index */
+#endif
+       xfs_inobt_key_t         key;    /* key value for leaf level upward */
+       xfs_buf_t               *lbp;   /* buffer for left neighbor block */
+       xfs_inobt_block_t       *left;  /* left neighbor btree block */
+       xfs_inobt_key_t         *lkp;   /* key pointer for left block */
+       xfs_inobt_ptr_t         *lpp;   /* address pointer for left block */
+       xfs_inobt_rec_t         *lrp;   /* record pointer for left block */
+       int                     nrec;   /* new number of left block entries */
+       xfs_buf_t               *rbp;   /* buffer for right (current) block */
+       xfs_inobt_block_t       *right; /* right (current) btree block */
+       xfs_inobt_key_t         *rkp;   /* key pointer for right block */
+       xfs_inobt_ptr_t         *rpp;   /* address pointer for right block */
+       xfs_inobt_rec_t         *rrp;   /* record pointer for right block */
+
+       /*
+        * Set up variables for this block as "right".
+        */
+       rbp = cur->bc_bufs[level];
+       right = XFS_BUF_TO_INOBT_BLOCK(rbp);
+#ifdef DEBUG
+       if (error = xfs_btree_check_sblock(cur, right, level, rbp))
+               return error;
+#endif
+       /*
+        * If we've got no left sibling then we can't shift an entry left.
+        */
+       if (INT_GET(right->bb_leftsib, ARCH_CONVERT) == NULLAGBLOCK) {
+               *stat = 0;
+               return 0;
+       }
+       /*
+        * If the cursor entry is the one that would be moved, don't 
+        * do it... it's too complicated.
+        */
+       if (cur->bc_ptrs[level] <= 1) {
+               *stat = 0;
+               return 0;
+       }
+       /*
+        * Set up the left neighbor as "left".
+        */
+       if (error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
+                       cur->bc_private.i.agno, INT_GET(right->bb_leftsib, ARCH_CONVERT), 0, &lbp,
+                       XFS_INO_BTREE_REF))
+               return error;
+       left = XFS_BUF_TO_INOBT_BLOCK(lbp);
+       if (error = xfs_btree_check_sblock(cur, left, level, lbp))
+               return error;
+       /*
+        * If it's full, it can't take another entry.
+        */
+       if (INT_GET(left->bb_numrecs, ARCH_CONVERT) == XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
+               *stat = 0;
+               return 0;
+       }
+       nrec = INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1;
+       /*
+        * If non-leaf, copy a key and a ptr to the left block.
+        */
+       if (level > 0) {
+               lkp = XFS_INOBT_KEY_ADDR(left, nrec, cur);
+               rkp = XFS_INOBT_KEY_ADDR(right, 1, cur);
+               *lkp = *rkp;
+               xfs_inobt_log_keys(cur, lbp, nrec, nrec);
+               lpp = XFS_INOBT_PTR_ADDR(left, nrec, cur);
+               rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
+#ifdef DEBUG
+               if (error = xfs_btree_check_sptr(cur, INT_GET(*rpp, ARCH_CONVERT), level))
+                       return error;
+#endif
+               *lpp = *rpp; /* INT_: no-change copy */
+               xfs_inobt_log_ptrs(cur, lbp, nrec, nrec);
+       }
+       /*
+        * If leaf, copy a record to the left block.
+        */
+       else {
+               lrp = XFS_INOBT_REC_ADDR(left, nrec, cur);
+               rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
+               *lrp = *rrp;
+               xfs_inobt_log_recs(cur, lbp, nrec, nrec);
+       }
+       /*
+        * Bump and log left's numrecs, decrement and log right's numrecs.
+        */
+       INT_MOD(left->bb_numrecs, ARCH_CONVERT, +1);
+       xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
+#ifdef DEBUG
+       if (level > 0)
+               xfs_btree_check_key(cur->bc_btnum, lkp - 1, lkp);
+       else
+               xfs_btree_check_rec(cur->bc_btnum, lrp - 1, lrp);
+#endif
+       INT_MOD(right->bb_numrecs, ARCH_CONVERT, -1);
+       xfs_inobt_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
+       /*
+        * Slide the contents of right down one entry.
+        */
+       if (level > 0) {
+#ifdef DEBUG
+               for (i = 0; i < INT_GET(right->bb_numrecs, ARCH_CONVERT); i++) {
+                       if (error = xfs_btree_check_sptr(cur, INT_GET(rpp[i + 1], ARCH_CONVERT),
+                                       level))
+                               return error;
+               }
+#endif
+               ovbcopy(rkp + 1, rkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
+               ovbcopy(rpp + 1, rpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
+               xfs_inobt_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
+               xfs_inobt_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
+       } else {
+               ovbcopy(rrp + 1, rrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
+               xfs_inobt_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
+               key.ir_startino = rrp->ir_startino; /* INT_: direct copy */
+               rkp = &key;
+       }
+       /*
+        * Update the parent key values of right.
+        */
+       if (error = xfs_inobt_updkey(cur, rkp, level + 1))
+               return error;
+       /*
+        * Slide the cursor value left one.
+        */
+       cur->bc_ptrs[level]--;
+       *stat = 1;
+       return 0;
+}
+
+/*
+ * Allocate a new root block, fill it in.
+ */
+STATIC int                             /* error */
+xfs_inobt_newroot(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       int                     *stat)  /* success/failure */
+{
+       xfs_agi_t               *agi;   /* a.g. inode header */
+       xfs_alloc_arg_t         args;   /* allocation argument structure */
+       xfs_inobt_block_t       *block; /* one half of the old root block */
+       xfs_buf_t               *bp;    /* buffer containing block */
+       int                     error;  /* error return value */
+       xfs_inobt_key_t         *kp;    /* btree key pointer */
+       xfs_agblock_t           lbno;   /* left block number */
+       xfs_buf_t               *lbp;   /* left buffer pointer */
+       xfs_inobt_block_t       *left;  /* left btree block */
+       xfs_buf_t               *nbp;   /* new (root) buffer */
+       xfs_inobt_block_t       *new;   /* new (root) btree block */
+       int                     nptr;   /* new value for key index, 1 or 2 */
+       xfs_inobt_ptr_t         *pp;    /* btree address pointer */
+       xfs_agblock_t           rbno;   /* right block number */
+       xfs_buf_t               *rbp;   /* right buffer pointer */
+       xfs_inobt_block_t       *right; /* right btree block */
+       xfs_inobt_rec_t         *rp;    /* btree record pointer */
+
+       ASSERT(cur->bc_nlevels < XFS_IN_MAXLEVELS(cur->bc_mp));
+
+       /*
+        * Get a block & a buffer.
+        */
+       agi = XFS_BUF_TO_AGI(cur->bc_private.i.agbp);
+       args.tp = cur->bc_tp;
+       args.mp = cur->bc_mp;
+       args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.i.agno,
+               INT_GET(agi->agi_root, ARCH_CONVERT));
+       args.mod = args.minleft = args.alignment = args.total = args.wasdel =
+               args.isfl = args.userdata = args.minalignslop = 0;
+       args.minlen = args.maxlen = args.prod = 1;
+       args.type = XFS_ALLOCTYPE_NEAR_BNO;
+       if (error = xfs_alloc_vextent(&args))
+               return error;
+       /*
+        * None available, we fail.
+        */
+       if (args.fsbno == NULLFSBLOCK) {
+               *stat = 0;
+               return 0;
+       }
+       ASSERT(args.len == 1);
+       nbp = xfs_btree_get_bufs(args.mp, args.tp, args.agno, args.agbno, 0);
+       new = XFS_BUF_TO_INOBT_BLOCK(nbp);
+       /*
+        * Set the root data in the a.g. inode structure.
+        */
+       INT_SET(agi->agi_root, ARCH_CONVERT, args.agbno);
+       INT_MOD(agi->agi_level, ARCH_CONVERT, 1);
+       xfs_ialloc_log_agi(args.tp, cur->bc_private.i.agbp,
+               XFS_AGI_ROOT | XFS_AGI_LEVEL);
+       /*
+        * At the previous root level there are now two blocks: the old
+        * root, and the new block generated when it was split.
+        * We don't know which one the cursor is pointing at, so we
+        * set up variables "left" and "right" for each case.
+        */
+       bp = cur->bc_bufs[cur->bc_nlevels - 1];
+       block = XFS_BUF_TO_INOBT_BLOCK(bp);
+#ifdef DEBUG
+       if (error = xfs_btree_check_sblock(cur, block, cur->bc_nlevels - 1, bp))
+               return error;
+#endif
+       if (INT_GET(block->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) {
+               /*
+                * Our block is left, pick up the right block.
+                */
+               lbp = bp;
+               lbno = XFS_DADDR_TO_AGBNO(args.mp, XFS_BUF_ADDR(lbp));
+               left = block;
+               rbno = INT_GET(left->bb_rightsib, ARCH_CONVERT);
+               if (error = xfs_btree_read_bufs(args.mp, args.tp, args.agno,
+                               rbno, 0, &rbp, XFS_INO_BTREE_REF))
+                       return error;
+               bp = rbp;
+               right = XFS_BUF_TO_INOBT_BLOCK(rbp);
+               if (error = xfs_btree_check_sblock(cur, right,
+                               cur->bc_nlevels - 1, rbp))
+                       return error;
+               nptr = 1;
+       } else {
+               /*
+                * Our block is right, pick up the left block.
+                */
+               rbp = bp;
+               rbno = XFS_DADDR_TO_AGBNO(args.mp, XFS_BUF_ADDR(rbp));
+               right = block;
+               lbno = INT_GET(right->bb_leftsib, ARCH_CONVERT);
+               if (error = xfs_btree_read_bufs(args.mp, args.tp, args.agno,
+                               lbno, 0, &lbp, XFS_INO_BTREE_REF))
+                       return error;
+               bp = lbp;
+               left = XFS_BUF_TO_INOBT_BLOCK(lbp);
+               if (error = xfs_btree_check_sblock(cur, left,
+                               cur->bc_nlevels - 1, lbp))
+                       return error;
+               nptr = 2;
+       }
+       /*
+        * Fill in the new block's btree header and log it.
+        */
+       INT_SET(new->bb_magic, ARCH_CONVERT, xfs_magics[cur->bc_btnum]);
+       INT_SET(new->bb_level, ARCH_CONVERT, (__uint16_t)cur->bc_nlevels);
+       INT_SET(new->bb_numrecs, ARCH_CONVERT, 2);
+       INT_SET(new->bb_leftsib, ARCH_CONVERT, NULLAGBLOCK);
+        INT_SET(new->bb_rightsib, ARCH_CONVERT, NULLAGBLOCK);
+       xfs_inobt_log_block(args.tp, nbp, XFS_BB_ALL_BITS);
+       ASSERT(lbno != NULLAGBLOCK && rbno != NULLAGBLOCK);
+       /*
+        * Fill in the key data in the new root.
+        */
+       kp = XFS_INOBT_KEY_ADDR(new, 1, cur);
+       if (INT_GET(left->bb_level, ARCH_CONVERT) > 0) {
+               kp[0] = *XFS_INOBT_KEY_ADDR(left, 1, cur); /* INT_: struct copy */
+               kp[1] = *XFS_INOBT_KEY_ADDR(right, 1, cur); /* INT_: struct copy */
+       } else {
+               rp = XFS_INOBT_REC_ADDR(left, 1, cur);
+               INT_COPY(kp[0].ir_startino, rp->ir_startino, ARCH_CONVERT);
+               rp = XFS_INOBT_REC_ADDR(right, 1, cur);
+               INT_COPY(kp[1].ir_startino, rp->ir_startino, ARCH_CONVERT);
+       }
+       xfs_inobt_log_keys(cur, nbp, 1, 2);
+       /*
+        * Fill in the pointer data in the new root.
+        */
+       pp = XFS_INOBT_PTR_ADDR(new, 1, cur);
+       INT_SET(pp[0], ARCH_CONVERT, lbno);
+       INT_SET(pp[1], ARCH_CONVERT, rbno);
+       xfs_inobt_log_ptrs(cur, nbp, 1, 2);
+       /*
+        * Fix up the cursor.
+        */
+       xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
+       cur->bc_ptrs[cur->bc_nlevels] = nptr;
+       cur->bc_nlevels++;
+       *stat = 1;
+       return 0;
+}
+
+/*
+ * Move 1 record right from cur/level if possible.
+ * Update cur to reflect the new path.
+ */
+STATIC int                             /* error */
+xfs_inobt_rshift(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       int                     level,  /* level to shift record on */
+       int                     *stat)  /* success/failure */
+{
+       int                     error;  /* error return value */
+       int                     i;      /* loop index */
+       xfs_inobt_key_t         key;    /* key value for leaf level upward */
+       xfs_buf_t               *lbp;   /* buffer for left (current) block */
+       xfs_inobt_block_t       *left;  /* left (current) btree block */
+       xfs_inobt_key_t         *lkp;   /* key pointer for left block */
+       xfs_inobt_ptr_t         *lpp;   /* address pointer for left block */
+       xfs_inobt_rec_t         *lrp;   /* record pointer for left block */
+       xfs_buf_t               *rbp;   /* buffer for right neighbor block */
+       xfs_inobt_block_t       *right; /* right neighbor btree block */
+       xfs_inobt_key_t         *rkp;   /* key pointer for right block */
+       xfs_inobt_ptr_t         *rpp;   /* address pointer for right block */
+       xfs_inobt_rec_t         *rrp;   /* record pointer for right block */
+       xfs_btree_cur_t         *tcur;  /* temporary cursor */
+
+       /*
+        * Set up variables for this block as "left".
+        */
+       lbp = cur->bc_bufs[level];
+       left = XFS_BUF_TO_INOBT_BLOCK(lbp);
+#ifdef DEBUG
+       if (error = xfs_btree_check_sblock(cur, left, level, lbp))
+               return error;
+#endif
+       /*
+        * If we've got no right sibling then we can't shift an entry right.
+        */
+       if (INT_GET(left->bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK) {
+               *stat = 0;
+               return 0;
+       }
+       /*
+        * If the cursor entry is the one that would be moved, don't
+        * do it... it's too complicated.
+        */
+       if (cur->bc_ptrs[level] >= INT_GET(left->bb_numrecs, ARCH_CONVERT)) {
+               *stat = 0;
+               return 0;
+       }
+       /*
+        * Set up the right neighbor as "right".
+        */
+       if (error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
+                       cur->bc_private.i.agno, INT_GET(left->bb_rightsib, ARCH_CONVERT), 0, &rbp,
+                       XFS_INO_BTREE_REF))
+               return error;
+       right = XFS_BUF_TO_INOBT_BLOCK(rbp);
+       if (error = xfs_btree_check_sblock(cur, right, level, rbp))
+               return error;
+       /*
+        * If it's full, it can't take another entry.
+        */
+       if (INT_GET(right->bb_numrecs, ARCH_CONVERT) == XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
+               *stat = 0;
+               return 0;
+       }
+       /*
+        * Make a hole at the start of the right neighbor block, then
+        * copy the last left block entry to the hole.
+        */
+       if (level > 0) {
+               lkp = XFS_INOBT_KEY_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT), cur);
+               lpp = XFS_INOBT_PTR_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT), cur);
+               rkp = XFS_INOBT_KEY_ADDR(right, 1, cur);
+               rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
+#ifdef DEBUG
+               for (i = INT_GET(right->bb_numrecs, ARCH_CONVERT) - 1; i >= 0; i--) {
+                       if (error = xfs_btree_check_sptr(cur, INT_GET(rpp[i], ARCH_CONVERT), level))
+                               return error;
+               }
+#endif
+               ovbcopy(rkp, rkp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
+               ovbcopy(rpp, rpp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
+#ifdef DEBUG
+               if (error = xfs_btree_check_sptr(cur, INT_GET(*lpp, ARCH_CONVERT), level))
+                       return error;
+#endif
+               *rkp = *lkp; /* INT_: no change copy */
+               *rpp = *lpp; /* INT_: no change copy */
+               xfs_inobt_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1);
+               xfs_inobt_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1);
+       } else {
+               lrp = XFS_INOBT_REC_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT), cur);
+               rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
+               ovbcopy(rrp, rrp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
+               *rrp = *lrp;
+               xfs_inobt_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1);
+               key.ir_startino = rrp->ir_startino; /* INT_: direct copy */
+               rkp = &key;
+       }
+       /*
+        * Decrement and log left's numrecs, bump and log right's numrecs.
+        */
+       INT_MOD(left->bb_numrecs, ARCH_CONVERT, -1);
+       xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
+       INT_MOD(right->bb_numrecs, ARCH_CONVERT, +1);
+#ifdef DEBUG
+       if (level > 0)
+               xfs_btree_check_key(cur->bc_btnum, rkp, rkp + 1);
+       else
+               xfs_btree_check_rec(cur->bc_btnum, rrp, rrp + 1);
+#endif
+       xfs_inobt_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
+       /*
+        * Using a temporary cursor, update the parent key values of the
+        * block on the right.
+        */
+       if (error = xfs_btree_dup_cursor(cur, &tcur))
+               return error;
+       xfs_btree_lastrec(tcur, level);
+       if ((error = xfs_inobt_increment(tcur, level, &i)) ||
+           (error = xfs_inobt_updkey(tcur, rkp, level + 1))) {
+               xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
+               return error;
+       }
+       xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
+       *stat = 1;
+       return 0;
+}
+
+/*
+ * Split cur/level block in half.
+ * Return new block number and its first record (to be inserted into parent).
+ */
+STATIC int                             /* error */
+xfs_inobt_split(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       int                     level,  /* level to split */
+       xfs_agblock_t           *bnop,  /* output: block number allocated */
+       xfs_inobt_key_t         *keyp,  /* output: first key of new block */
+       xfs_btree_cur_t         **curp, /* output: new cursor */
+       int                     *stat)  /* success/failure */
+{
+       xfs_alloc_arg_t         args;   /* allocation argument structure */
+       int                     error;  /* error return value */
+       int                     i;      /* loop index/record number */
+       xfs_agblock_t           lbno;   /* left (current) block number */
+       xfs_buf_t               *lbp;   /* buffer for left block */
+       xfs_inobt_block_t       *left;  /* left (current) btree block */
+       xfs_inobt_key_t         *lkp;   /* left btree key pointer */
+       xfs_inobt_ptr_t         *lpp;   /* left btree address pointer */
+       xfs_inobt_rec_t         *lrp;   /* left btree record pointer */
+       xfs_buf_t               *rbp;   /* buffer for right block */
+       xfs_inobt_block_t       *right; /* right (new) btree block */
+       xfs_inobt_key_t         *rkp;   /* right btree key pointer */
+       xfs_inobt_ptr_t         *rpp;   /* right btree address pointer */
+       xfs_inobt_rec_t         *rrp;   /* right btree record pointer */
+
+       /*
+        * Set up left block (current one).
+        */
+       lbp = cur->bc_bufs[level];
+       args.tp = cur->bc_tp;
+       args.mp = cur->bc_mp;
+       lbno = XFS_DADDR_TO_AGBNO(args.mp, XFS_BUF_ADDR(lbp));
+       /*
+        * Allocate the new block.
+        * If we can't do it, we're toast.  Give up.
+        */
+       args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.i.agno, lbno);
+       args.mod = args.minleft = args.alignment = args.total = args.wasdel =
+               args.isfl = args.userdata = args.minalignslop = 0;
+       args.minlen = args.maxlen = args.prod = 1;
+       args.type = XFS_ALLOCTYPE_NEAR_BNO;
+       if (error = xfs_alloc_vextent(&args))
+               return error;
+       if (args.fsbno == NULLFSBLOCK) {
+               *stat = 0;
+               return 0;
+       }
+       ASSERT(args.len == 1);
+       rbp = xfs_btree_get_bufs(args.mp, args.tp, args.agno, args.agbno, 0);
+       /*
+        * Set up the new block as "right".
+        */
+       right = XFS_BUF_TO_INOBT_BLOCK(rbp);
+       /*
+        * "Left" is the current (according to the cursor) block.
+        */
+       left = XFS_BUF_TO_INOBT_BLOCK(lbp);
+#ifdef DEBUG
+       if (error = xfs_btree_check_sblock(cur, left, level, lbp))
+               return error;
+#endif
+       /*
+        * Fill in the btree header for the new block.
+        */
+       INT_SET(right->bb_magic, ARCH_CONVERT, xfs_magics[cur->bc_btnum]);
+       right->bb_level = left->bb_level; /* INT_: direct copy */
+       INT_SET(right->bb_numrecs, ARCH_CONVERT, (__uint16_t)(INT_GET(left->bb_numrecs, ARCH_CONVERT) / 2));
+       /*
+        * Make sure that if there's an odd number of entries now, that
+        * each new block will have the same number of entries.
+        */
+       if ((INT_GET(left->bb_numrecs, ARCH_CONVERT) & 1) &&
+           cur->bc_ptrs[level] <= INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1)
+               INT_MOD(right->bb_numrecs, ARCH_CONVERT, +1);
+       i = INT_GET(left->bb_numrecs, ARCH_CONVERT) - INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1;
+       /*
+        * For non-leaf blocks, copy keys and addresses over to the new block.
+        */
+       if (level > 0) {
+               lkp = XFS_INOBT_KEY_ADDR(left, i, cur);
+               lpp = XFS_INOBT_PTR_ADDR(left, i, cur);
+               rkp = XFS_INOBT_KEY_ADDR(right, 1, cur);
+               rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
+#ifdef DEBUG
+               for (i = 0; i < INT_GET(right->bb_numrecs, ARCH_CONVERT); i++) {
+                       if (error = xfs_btree_check_sptr(cur, INT_GET(lpp[i], ARCH_CONVERT), level))
+                               return error;
+               }
+#endif
+               bcopy(lkp, rkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
+               bcopy(lpp, rpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
+               xfs_inobt_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
+               xfs_inobt_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
+               *keyp = *rkp;
+       }
+       /*
+        * For leaf blocks, copy records over to the new block.
+        */
+       else {
+               lrp = XFS_INOBT_REC_ADDR(left, i, cur);
+               rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
+               bcopy(lrp, rrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
+               xfs_inobt_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
+               keyp->ir_startino = rrp->ir_startino; /* INT_: direct copy */
+       }
+       /*
+        * Find the left block number by looking in the buffer.
+        * Adjust numrecs, sibling pointers.
+        */
+       INT_MOD(left->bb_numrecs, ARCH_CONVERT, -(INT_GET(right->bb_numrecs, ARCH_CONVERT)));
+       right->bb_rightsib = left->bb_rightsib; /* INT_: direct copy */
+       INT_SET(left->bb_rightsib, ARCH_CONVERT, args.agbno);
+       INT_SET(right->bb_leftsib, ARCH_CONVERT, lbno);
+       xfs_inobt_log_block(args.tp, rbp, XFS_BB_ALL_BITS);
+       xfs_inobt_log_block(args.tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
+       /*
+        * If there's a block to the new block's right, make that block
+        * point back to right instead of to left.
+        */
+       if (INT_GET(right->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) {
+               xfs_inobt_block_t       *rrblock;       /* rr btree block */
+               xfs_buf_t               *rrbp;          /* buffer for rrblock */
+
+               if (error = xfs_btree_read_bufs(args.mp, args.tp, args.agno,
+                               INT_GET(right->bb_rightsib, ARCH_CONVERT), 0, &rrbp,
+                               XFS_INO_BTREE_REF))
+                       return error;
+               rrblock = XFS_BUF_TO_INOBT_BLOCK(rrbp);
+               if (error = xfs_btree_check_sblock(cur, rrblock, level, rrbp))
+                       return error;
+               INT_SET(rrblock->bb_leftsib, ARCH_CONVERT, args.agbno);
+               xfs_inobt_log_block(args.tp, rrbp, XFS_BB_LEFTSIB);
+       }
+       /*
+        * If the cursor is really in the right block, move it there.
+        * If it's just pointing past the last entry in left, then we'll
+        * insert there, so don't change anything in that case.
+        */
+       if (cur->bc_ptrs[level] > INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1) {
+               xfs_btree_setbuf(cur, level, rbp);
+               cur->bc_ptrs[level] -= INT_GET(left->bb_numrecs, ARCH_CONVERT);
+       }
+       /*
+        * If there are more levels, we'll need another cursor which refers
+        * the right block, no matter where this cursor was.
+        */
+       if (level + 1 < cur->bc_nlevels) {
+               if (error = xfs_btree_dup_cursor(cur, curp))
+                       return error;
+               (*curp)->bc_ptrs[level + 1]++;
+       }
+       *bnop = args.agbno;
+       *stat = 1;
+       return 0;
+}
+
+/*
+ * Update keys at all levels from here to the root along the cursor's path.
+ */
+STATIC int                             /* error */
+xfs_inobt_updkey(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       xfs_inobt_key_t         *keyp,  /* new key value to update to */
+       int                     level)  /* starting level for update */
+{
+       int                     ptr;    /* index of key in block */
+
+       /*
+        * Go up the tree from this level toward the root.
+        * At each level, update the key value to the value input.
+        * Stop when we reach a level where the cursor isn't pointing
+        * at the first entry in the block.
+        */
+       for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
+               xfs_buf_t               *bp;    /* buffer for block */
+               xfs_inobt_block_t       *block; /* btree block */
+#ifdef DEBUG
+               int                     error;  /* error return value */
+#endif
+               xfs_inobt_key_t         *kp;    /* ptr to btree block keys */
+
+               bp = cur->bc_bufs[level];
+               block = XFS_BUF_TO_INOBT_BLOCK(bp);
+#ifdef DEBUG
+               if (error = xfs_btree_check_sblock(cur, block, level, bp))
+                       return error;
+#endif
+               ptr = cur->bc_ptrs[level];
+               kp = XFS_INOBT_KEY_ADDR(block, ptr, cur);
+               *kp = *keyp;
+               xfs_inobt_log_keys(cur, bp, ptr, ptr);
+       }
+       return 0;
+}
+
+/*
+ * Externally visible routines.
+ */
+
+/*
+ * Decrement cursor by one record at the level.
+ * For nonzero levels the leaf-ward information is untouched.
+ */
+int                                    /* error */
+xfs_inobt_decrement(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       int                     level,  /* level in btree, 0 is leaf */
+       int                     *stat)  /* success/failure */
+{
+       xfs_inobt_block_t       *block; /* btree block */
+       int                     error;
+       int                     lev;    /* btree level */
+
+       ASSERT(level < cur->bc_nlevels);
+       /*
+        * Read-ahead to the left at this level.
+        */
+       xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
+       /*
+        * Decrement the ptr at this level.  If we're still in the block
+        * then we're done.
+        */
+       if (--cur->bc_ptrs[level] > 0) {
+               *stat = 1;
+               return 0;
+       }
+       /*
+        * Get a pointer to the btree block.
+        */
+       block = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[level]);
+#ifdef DEBUG
+       if (error = xfs_btree_check_sblock(cur, block, level,
+                       cur->bc_bufs[level]))
+               return error;
+#endif
+       /*
+        * If we just went off the left edge of the tree, return failure.
+        */
+       if (INT_GET(block->bb_leftsib, ARCH_CONVERT) == NULLAGBLOCK) {
+               *stat = 0;
+               return 0;
+       }
+       /*
+        * March up the tree decrementing pointers.
+        * Stop when we don't go off the left edge of a block.
+        */
+       for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
+               if (--cur->bc_ptrs[lev] > 0)
+                       break;
+               /*
+                * Read-ahead the left block, we're going to read it
+                * in the next loop.
+                */
+               xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
+       }
+       /*
+        * If we went off the root then we are seriously confused.
+        */
+       ASSERT(lev < cur->bc_nlevels);
+       /*
+        * Now walk back down the tree, fixing up the cursor's buffer
+        * pointers and key numbers.
+        */
+       for (block = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[lev]); lev > level; ) {
+               xfs_agblock_t   agbno;  /* block number of btree block */
+               xfs_buf_t       *bp;    /* buffer containing btree block */
+
+               agbno = INT_GET(*XFS_INOBT_PTR_ADDR(block, cur->bc_ptrs[lev], cur), ARCH_CONVERT);
+               if (error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
+                               cur->bc_private.i.agno, agbno, 0, &bp,
+                               XFS_INO_BTREE_REF))
+                       return error;
+               lev--;
+               xfs_btree_setbuf(cur, lev, bp);
+               block = XFS_BUF_TO_INOBT_BLOCK(bp);
+               if (error = xfs_btree_check_sblock(cur, block, lev, bp))
+                       return error;
+               cur->bc_ptrs[lev] = INT_GET(block->bb_numrecs, ARCH_CONVERT);
+       }
+       *stat = 1;
+       return 0;
+}
+
+/* 
+ * Get the data from the pointed-to record.
+ */
+int                                    /* error */
+xfs_inobt_get_rec(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       xfs_agino_t             *ino,   /* output: starting inode of chunk */
+       __int32_t               *fcnt,  /* output: number of free inodes */
+       xfs_inofree_t           *free,  /* output: free inode mask */
+       int                     *stat,  /* output: success/failure */
+        xfs_arch_t              arch)   /* input: architecture */
+{
+       xfs_inobt_block_t       *block; /* btree block */
+       xfs_buf_t               *bp;    /* buffer containing btree block */
+#ifdef DEBUG
+       int                     error;  /* error return value */
+#endif
+       int                     ptr;    /* record number */
+       xfs_inobt_rec_t         *rec;   /* record data */
+
+       bp = cur->bc_bufs[0];
+       ptr = cur->bc_ptrs[0];
+       block = XFS_BUF_TO_INOBT_BLOCK(bp);
+#ifdef DEBUG
+       if (error = xfs_btree_check_sblock(cur, block, 0, bp))
+               return error;
+#endif
+       /*
+        * Off the right end or left end, return failure.
+        */
+       if (ptr > INT_GET(block->bb_numrecs, ARCH_CONVERT) || ptr <= 0) {
+               *stat = 0;
+               return 0;
+       }
+       /*
+        * Point to the record and extract its data.
+        */
+       rec = XFS_INOBT_REC_ADDR(block, ptr, cur);
+        ASSERT(arch == ARCH_NOCONVERT || arch == ARCH_CONVERT);
+        if (arch == ARCH_NOCONVERT) {
+           *ino = INT_GET(rec->ir_startino, ARCH_CONVERT);
+           *fcnt = INT_GET(rec->ir_freecount, ARCH_CONVERT);
+           *free = INT_GET(rec->ir_free, ARCH_CONVERT);
+        } else {
+           INT_COPY(*ino, rec->ir_startino, ARCH_CONVERT);
+           INT_COPY(*fcnt, rec->ir_freecount, ARCH_CONVERT);
+           INT_COPY(*free, rec->ir_free, ARCH_CONVERT);
+        }
+       *stat = 1;
+       return 0;
+}
+
+/*
+ * Increment cursor by one record at the level.
+ * For nonzero levels the leaf-ward information is untouched.
+ */
+int                                    /* error */
+xfs_inobt_increment(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       int                     level,  /* level in btree, 0 is leaf */
+       int                     *stat)  /* success/failure */
+{
+       xfs_inobt_block_t       *block; /* btree block */
+       xfs_buf_t               *bp;    /* buffer containing btree block */
+       int                     error;  /* error return value */
+       int                     lev;    /* btree level */
+
+       ASSERT(level < cur->bc_nlevels);
+       /*
+        * Read-ahead to the right at this level.
+        */
+       xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
+       /*
+        * Get a pointer to the btree block.
+        */
+       bp = cur->bc_bufs[level];
+       block = XFS_BUF_TO_INOBT_BLOCK(bp);
+#ifdef DEBUG
+       if (error = xfs_btree_check_sblock(cur, block, level, bp))
+               return error;
+#endif
+       /*
+        * Increment the ptr at this level.  If we're still in the block
+        * then we're done.
+        */
+       if (++cur->bc_ptrs[level] <= INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
+               *stat = 1;
+               return 0;
+       }
+       /*
+        * If we just went off the right edge of the tree, return failure.
+        */
+       if (INT_GET(block->bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK) {
+               *stat = 0;
+               return 0;
+       }
+       /*
+        * March up the tree incrementing pointers.
+        * Stop when we don't go off the right edge of a block.
+        */
+       for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
+               bp = cur->bc_bufs[lev];
+               block = XFS_BUF_TO_INOBT_BLOCK(bp);
+#ifdef DEBUG
+               if (error = xfs_btree_check_sblock(cur, block, lev, bp))
+                       return error;
+#endif
+               if (++cur->bc_ptrs[lev] <= INT_GET(block->bb_numrecs, ARCH_CONVERT))
+                       break;
+               /*
+                * Read-ahead the right block, we're going to read it
+                * in the next loop.
+                */
+               xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
+       }
+       /*
+        * If we went off the root then we are seriously confused.
+        */
+       ASSERT(lev < cur->bc_nlevels);
+       /*
+        * Now walk back down the tree, fixing up the cursor's buffer
+        * pointers and key numbers.
+        */
+       for (bp = cur->bc_bufs[lev], block = XFS_BUF_TO_INOBT_BLOCK(bp);
+            lev > level; ) {
+               xfs_agblock_t   agbno;  /* block number of btree block */
+
+               agbno = INT_GET(*XFS_INOBT_PTR_ADDR(block, cur->bc_ptrs[lev], cur), ARCH_CONVERT);
+               if (error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
+                               cur->bc_private.i.agno, agbno, 0, &bp,
+                               XFS_INO_BTREE_REF))
+                       return error;
+               lev--;
+               xfs_btree_setbuf(cur, lev, bp);
+               block = XFS_BUF_TO_INOBT_BLOCK(bp);
+               if (error = xfs_btree_check_sblock(cur, block, lev, bp))
+                       return error;
+               cur->bc_ptrs[lev] = 1;
+       }
+       *stat = 1;
+       return 0;
+}
+
+/*
+ * Insert the current record at the point referenced by cur.
+ * The cursor may be inconsistent on return if splits have been done.
+ */
+int                                    /* error */
+xfs_inobt_insert(
+       xfs_btree_cur_t *cur,           /* btree cursor */
+       int             *stat)          /* success/failure */
+{
+       int             error;          /* error return value */
+       int             i;              /* result value, 0 for failure */
+       int             level;          /* current level number in btree */
+       xfs_agblock_t   nbno;           /* new block number (split result) */
+       xfs_btree_cur_t *ncur;          /* new cursor (split result) */
+       xfs_inobt_rec_t nrec;           /* record being inserted this level */
+       xfs_btree_cur_t *pcur;          /* previous level's cursor */
+
+       level = 0;
+       nbno = NULLAGBLOCK;
+       INT_SET(nrec.ir_startino, ARCH_CONVERT, cur->bc_rec.i.ir_startino);
+       INT_SET(nrec.ir_freecount, ARCH_CONVERT, cur->bc_rec.i.ir_freecount);
+       INT_SET(nrec.ir_free, ARCH_CONVERT, cur->bc_rec.i.ir_free);
+       ncur = (xfs_btree_cur_t *)0;
+       pcur = cur;
+       /*
+        * Loop going up the tree, starting at the leaf level.
+        * Stop when we don't get a split block, that must mean that
+        * the insert is finished with this level.
+        */
+       do {
+               /*
+                * Insert nrec/nbno into this level of the tree.
+                * Note if we fail, nbno will be null.
+                */
+               if (error = xfs_inobt_insrec(pcur, level++, &nbno, &nrec, &ncur,
+                               &i)) {
+                       if (pcur != cur)
+                               xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
+                       return error;
+               }
+               /*
+                * See if the cursor we just used is trash.
+                * Can't trash the caller's cursor, but otherwise we should
+                * if ncur is a new cursor or we're about to be done.
+                */
+               if (pcur != cur && (ncur || nbno == NULLAGBLOCK)) {
+                       cur->bc_nlevels = pcur->bc_nlevels;
+                       xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
+               }
+               /*
+                * If we got a new cursor, switch to it.
+                */
+               if (ncur) {
+                       pcur = ncur;
+                       ncur = (xfs_btree_cur_t *)0;
+               }
+       } while (nbno != NULLAGBLOCK);
+       *stat = i;
+       return 0;
+}
+
+/*
+ * Lookup the record equal to ino in the btree given by cur.
+ */
+int                                    /* error */
+xfs_inobt_lookup_eq(
+       xfs_btree_cur_t *cur,           /* btree cursor */
+       xfs_agino_t     ino,            /* starting inode of chunk */
+       __int32_t       fcnt,           /* free inode count */
+       xfs_inofree_t   free,           /* free inode mask */
+       int             *stat)          /* success/failure */
+{
+       cur->bc_rec.i.ir_startino = ino;
+       cur->bc_rec.i.ir_freecount = fcnt;
+       cur->bc_rec.i.ir_free = free;
+       return xfs_inobt_lookup(cur, XFS_LOOKUP_EQ, stat);
+}
+
+/*
+ * Lookup the first record greater than or equal to ino
+ * in the btree given by cur.
+ */
+int                                    /* error */
+xfs_inobt_lookup_ge(
+       xfs_btree_cur_t *cur,           /* btree cursor */
+       xfs_agino_t     ino,            /* starting inode of chunk */
+       __int32_t       fcnt,           /* free inode count */
+       xfs_inofree_t   free,           /* free inode mask */
+       int             *stat)          /* success/failure */
+{
+       cur->bc_rec.i.ir_startino = ino;
+       cur->bc_rec.i.ir_freecount = fcnt;
+       cur->bc_rec.i.ir_free = free;
+       return xfs_inobt_lookup(cur, XFS_LOOKUP_GE, stat);
+}
+
+/*
+ * Lookup the first record less than or equal to ino
+ * in the btree given by cur.
+ */
+int                                    /* error */
+xfs_inobt_lookup_le(
+       xfs_btree_cur_t *cur,           /* btree cursor */
+       xfs_agino_t     ino,            /* starting inode of chunk */
+       __int32_t       fcnt,           /* free inode count */
+       xfs_inofree_t   free,           /* free inode mask */
+       int             *stat)          /* success/failure */
+{
+       cur->bc_rec.i.ir_startino = ino;
+       cur->bc_rec.i.ir_freecount = fcnt;
+       cur->bc_rec.i.ir_free = free;
+       return xfs_inobt_lookup(cur, XFS_LOOKUP_LE, stat);
+}
+
+/*
+ * Update the record referred to by cur, to the value given
+ * by [ino, fcnt, free].
+ * This either works (return 0) or gets an EFSCORRUPTED error.
+ */
+int                                    /* error */
+xfs_inobt_update(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       xfs_agino_t             ino,    /* starting inode of chunk */
+       __int32_t               fcnt,   /* free inode count */
+       xfs_inofree_t           free)   /* free inode mask */
+{
+       xfs_inobt_block_t       *block; /* btree block to update */
+       xfs_buf_t               *bp;    /* buffer containing btree block */
+       int                     error;  /* error return value */
+       int                     ptr;    /* current record number (updating) */
+       xfs_inobt_rec_t         *rp;    /* pointer to updated record */
+
+       /*
+        * Pick up the current block.
+        */
+       bp = cur->bc_bufs[0];
+       block = XFS_BUF_TO_INOBT_BLOCK(bp);
+#ifdef DEBUG
+       if (error = xfs_btree_check_sblock(cur, block, 0, bp))
+               return error;
+#endif
+       /*
+        * Get the address of the rec to be updated.
+        */
+       ptr = cur->bc_ptrs[0];
+       rp = XFS_INOBT_REC_ADDR(block, ptr, cur);
+       /*
+        * Fill in the new contents and log them.
+        */
+       INT_SET(rp->ir_startino, ARCH_CONVERT, ino);
+       INT_SET(rp->ir_freecount, ARCH_CONVERT, fcnt);
+       INT_SET(rp->ir_free, ARCH_CONVERT, free);
+       xfs_inobt_log_recs(cur, bp, ptr, ptr);
+       /*
+        * Updating first record in leaf. Pass new key value up to our parent.
+        */
+       if (ptr == 1) {
+               xfs_inobt_key_t key;    /* key containing [ino] */
+
+               INT_SET(key.ir_startino, ARCH_CONVERT, ino);
+               if (error = xfs_inobt_updkey(cur, &key, 1))
+                       return error;
+       }
+       return 0;
+}
diff --git a/libxfs/xfs_inode.c b/libxfs/xfs_inode.c
new file mode 100644 (file)
index 0000000..36bf1bd
--- /dev/null
@@ -0,0 +1,1371 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <xfs.h>
+
+xfs_zone_t *xfs_ifork_zone;
+xfs_zone_t *xfs_inode_zone;
+
+#ifdef DEBUG
+void
+xfs_inobp_check(
+       xfs_mount_t     *mp,
+       xfs_buf_t       *bp)
+{
+       int             i;
+       int             j;
+       xfs_dinode_t    *dip;
+
+       j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog;
+
+       for (i = 0; i < j; i++) {
+               dip = (xfs_dinode_t *)xfs_buf_offset(bp,
+                                       i * mp->m_sb.sb_inodesize);
+               if (INT_ISZERO(dip->di_next_unlinked, ARCH_CONVERT))  {
+                       xfs_fs_cmn_err(CE_ALERT, mp,
+                               "Detected a bogus zero next_unlinked field in incore inode buffer 0x%p.  About to pop an ASSERT.",
+                               bp);
+                       ASSERT(!INT_ISZERO(dip->di_next_unlinked, ARCH_CONVERT));
+               }
+       }
+}
+#endif
+
+
+/*
+ * This routine is called to map an inode to the buffer containing
+ * the on-disk version of the inode.  It returns a pointer to the
+ * buffer containing the on-disk inode in the bpp parameter, and in
+ * the dip parameter it returns a pointer to the on-disk inode within
+ * that buffer.
+ *
+ * If a non-zero error is returned, then the contents of bpp and
+ * dipp are undefined.
+ *
+ * If the inode is new and has not yet been initialized, use xfs_imap()
+ * to determine the size and location of the buffer to read from disk.
+ * If the inode has already been mapped to its buffer and read in once,
+ * then use the mapping information stored in the inode rather than
+ * calling xfs_imap().  This allows us to avoid the overhead of looking
+ * at the inode btree for small block file systems (see xfs_dilocate()).
+ * We can tell whether the inode has been mapped in before by comparing
+ * its disk block address to 0.  Only uninitialized inodes will have
+ * 0 for the disk block address.
+ */
+int
+xfs_itobp(
+       xfs_mount_t     *mp,
+       xfs_trans_t     *tp,
+       xfs_inode_t     *ip,    
+       xfs_dinode_t    **dipp,
+       xfs_buf_t       **bpp,
+       xfs_daddr_t     bno)
+{
+       xfs_buf_t       *bp;
+       int             error;
+       xfs_imap_t      imap;
+#ifdef __KERNEL__
+       int             i;
+       int             ni;
+#endif
+
+       if (ip->i_blkno == (xfs_daddr_t)0) {
+               /*
+                * Call the space management code to find the location of the
+                * inode on disk.
+                */
+               imap.im_blkno = bno;
+               error = xfs_imap(mp, tp, ip->i_ino, &imap, XFS_IMAP_LOOKUP);
+               if (error != 0) {
+                       return error;
+               }
+
+               /*
+                * If the inode number maps to a block outside the bounds
+                * of the file system then return NULL rather than calling
+                * read_buf and panicing when we get an error from the
+                * driver.
+                */
+               if ((imap.im_blkno + imap.im_len) >
+                   XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) {
+                       return XFS_ERROR(EINVAL);
+               }
+
+               /*
+                * Fill in the fields in the inode that will be used to
+                * map the inode to its buffer from now on.
+                */
+               ip->i_blkno = imap.im_blkno;
+               ip->i_len = imap.im_len;
+               ip->i_boffset = imap.im_boffset;
+       } else {
+               /*
+                * We've already mapped the inode once, so just use the
+                * mapping that we saved the first time.
+                */
+               imap.im_blkno = ip->i_blkno;
+               imap.im_len = ip->i_len;
+               imap.im_boffset = ip->i_boffset;
+       }
+       ASSERT(bno == 0 || bno == imap.im_blkno);
+
+       /*
+        * Read in the buffer.  If tp is NULL, xfs_trans_read_buf() will
+        * default to just a read_buf() call.
+        */
+       error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap.im_blkno,
+                                  (int)imap.im_len, XFS_BUF_LOCK, &bp);
+
+       if (error) {
+               return error;
+       }
+#ifdef __KERNEL__
+       /*
+        * Validate the magic number and version of every inode in the buffer
+        * (if DEBUG kernel) or the first inode in the buffer, otherwise.
+        */
+#ifdef DEBUG
+       ni = BBTOB(imap.im_len) >> mp->m_sb.sb_inodelog;
+#else
+       ni = 1;
+#endif
+       for (i = 0; i < ni; i++) {
+               int             di_ok;
+               xfs_dinode_t    *dip;
+
+               dip = (xfs_dinode_t *)xfs_buf_offset(bp,
+                                       (i << mp->m_sb.sb_inodelog));
+               di_ok = INT_GET(dip->di_core.di_magic, ARCH_CONVERT) == XFS_DINODE_MAGIC &&
+                           XFS_DINODE_GOOD_VERSION(INT_GET(dip->di_core.di_version, ARCH_CONVERT));
+               if (XFS_TEST_ERROR(!di_ok, mp, XFS_ERRTAG_ITOBP_INOTOBP,
+                                XFS_RANDOM_ITOBP_INOTOBP)) {
+#ifdef DEBUG
+                       prdev("bad inode magic/vsn daddr 0x%Lx #%d (magic=%x)", 
+                               mp->m_dev, imap.im_blkno, i,
+                               INT_GET(dip->di_core.di_magic, ARCH_CONVERT));
+#endif
+                       xfs_trans_brelse(tp, bp);
+                       return XFS_ERROR(EFSCORRUPTED);
+               }
+       }
+#endif /* __KERNEL__ */
+
+       xfs_inobp_check(mp, bp);
+
+       /*
+        * Mark the buffer as an inode buffer now that it looks good
+        */
+       XFS_BUF_SET_VTYPE(bp, B_FS_INO);
+
+       /*
+        * Set *dipp to point to the on-disk inode in the buffer.
+        */
+       *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset);
+       *bpp = bp;
+       return 0;
+}
+
+/*
+ * Move inode type and inode format specific information from the
+ * on-disk inode to the in-core inode.  For fifos, devs, and sockets
+ * this means set if_rdev to the proper value.  For files, directories,
+ * and symlinks this means to bring in the in-line data or extent
+ * pointers.  For a file in B-tree format, only the root is immediately
+ * brought in-core.  The rest will be in-lined in if_extents when it
+ * is first referenced (see xfs_iread_extents()).
+ */
+STATIC int
+xfs_iformat(
+       xfs_inode_t             *ip,
+       xfs_dinode_t            *dip)
+{
+       xfs_attr_shortform_t    *atp;
+       int                     size;
+       int                     error;
+        xfs_fsize_t             di_size;
+       ip->i_df.if_ext_max =
+               XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
+       error = 0;
+
+       if (INT_GET(dip->di_core.di_nextents, ARCH_CONVERT) + 
+                INT_GET(dip->di_core.di_anextents, ARCH_CONVERT) >
+           INT_GET(dip->di_core.di_nblocks, ARCH_CONVERT)) {
+               xfs_fs_cmn_err(CE_WARN, ip->i_mount,
+                       "corrupt dinode %Lu, extent total = %d, nblocks = %Ld.  Unmount and run xfs_repair.",
+                       ip->i_ino,
+                       (int)(INT_GET(dip->di_core.di_nextents, ARCH_CONVERT) + INT_GET(dip->di_core.di_anextents, ARCH_CONVERT)),
+                       INT_GET(dip->di_core.di_nblocks, ARCH_CONVERT));
+               return XFS_ERROR(EFSCORRUPTED);
+       }
+
+       if (INT_GET(dip->di_core.di_forkoff, ARCH_CONVERT) > ip->i_mount->m_sb.sb_inodesize) {
+               xfs_fs_cmn_err(CE_WARN, ip->i_mount,
+                       "corrupt dinode %Lu, forkoff = 0x%x.  Unmount and run xfs_repair.",
+                       ip->i_ino, (int)(INT_GET(dip->di_core.di_forkoff, ARCH_CONVERT)));
+               return XFS_ERROR(EFSCORRUPTED);
+       }
+
+       switch (ip->i_d.di_mode & IFMT) {
+       case IFIFO:
+       case IFCHR:
+       case IFBLK:
+       case IFSOCK:
+               if (INT_GET(dip->di_core.di_format, ARCH_CONVERT) != XFS_DINODE_FMT_DEV)
+                       return XFS_ERROR(EFSCORRUPTED);
+               ip->i_d.di_size = 0;
+               ip->i_df.if_u2.if_rdev = INT_GET(dip->di_u.di_dev, ARCH_CONVERT);
+               break;
+
+       case IFREG:
+       case IFLNK:
+       case IFDIR:
+               switch (INT_GET(dip->di_core.di_format, ARCH_CONVERT)) {
+               case XFS_DINODE_FMT_LOCAL:
+                       /*
+                        * no local regular files yet
+                        */
+                       if ((INT_GET(dip->di_core.di_mode, ARCH_CONVERT) & IFMT) == IFREG) {
+                               xfs_fs_cmn_err(CE_WARN, ip->i_mount,
+                                       "corrupt inode (local format for regular file) %Lu.  Unmount and run xfs_repair.",
+                                       ip->i_ino);
+                               return XFS_ERROR(EFSCORRUPTED);
+                       }
+                        
+                        di_size=INT_GET(dip->di_core.di_size, ARCH_CONVERT);
+                       if (di_size >
+                           XFS_DFORK_DSIZE_ARCH(dip, ip->i_mount, ARCH_CONVERT)) {
+                               xfs_fs_cmn_err(CE_WARN, ip->i_mount,
+                                       "corrupt inode %Lu (bad size %Ld for local inode).  Unmount and run xfs_repair.",
+                                       ip->i_ino, di_size);
+                               return XFS_ERROR(EFSCORRUPTED);
+                       }
+
+                       size = (int)di_size;
+                       error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, size);
+                       break;
+               case XFS_DINODE_FMT_EXTENTS:
+                       error = xfs_iformat_extents(ip, dip, XFS_DATA_FORK);
+                       break;
+               case XFS_DINODE_FMT_BTREE:
+                       error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK);
+                       break;
+               default:
+                       return XFS_ERROR(EFSCORRUPTED);
+               }
+               break;
+
+       default:
+               return XFS_ERROR(EFSCORRUPTED);
+       }
+       if (error) {
+               return error;
+        }
+       if (!XFS_DFORK_Q_ARCH(dip, ARCH_CONVERT))
+               return 0;
+       ASSERT(ip->i_afp == NULL);
+       ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP);
+       ip->i_afp->if_ext_max =
+               XFS_IFORK_ASIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
+       switch (INT_GET(dip->di_core.di_aformat, ARCH_CONVERT)) {
+       case XFS_DINODE_FMT_LOCAL:
+               atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR_ARCH(dip, ARCH_CONVERT);
+               size = (int)INT_GET(atp->hdr.totsize, ARCH_CONVERT);
+               error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size);
+               break;
+       case XFS_DINODE_FMT_EXTENTS:
+               error = xfs_iformat_extents(ip, dip, XFS_ATTR_FORK);
+               break;
+       case XFS_DINODE_FMT_BTREE:
+               error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK);
+               break;
+       default:
+               error = XFS_ERROR(EFSCORRUPTED);
+               break;
+       }
+       if (error) {
+               kmem_zone_free(xfs_ifork_zone, ip->i_afp);
+               ip->i_afp = NULL;
+               xfs_idestroy_fork(ip, XFS_DATA_FORK);
+       }
+       return error;
+}
+
+/*
+ * The file is in-lined in the on-disk inode.
+ * If it fits into if_inline_data, then copy
+ * it there, otherwise allocate a buffer for it
+ * and copy the data there.  Either way, set
+ * if_data to point at the data.
+ * If we allocate a buffer for the data, make
+ * sure that its size is a multiple of 4 and
+ * record the real size in i_real_bytes.
+ */
+STATIC int
+xfs_iformat_local(
+       xfs_inode_t     *ip,
+       xfs_dinode_t    *dip,
+       int             whichfork,
+       int             size)
+{
+       xfs_ifork_t     *ifp;
+       int             real_size;
+
+       /*
+        * If the size is unreasonable, then something
+        * is wrong and we just bail out rather than crash in
+        * kmem_alloc() or bcopy() below.
+        */
+       if (size > XFS_DFORK_SIZE_ARCH(dip, ip->i_mount, whichfork, ARCH_CONVERT)) {
+               xfs_fs_cmn_err(CE_WARN, ip->i_mount,
+                       "corrupt inode %Lu (bad size %d for local fork, size = %d).  Unmount and run xfs_repair.",
+                       ip->i_ino, size,
+                       XFS_DFORK_SIZE_ARCH(dip, ip->i_mount, whichfork, ARCH_CONVERT));
+               return XFS_ERROR(EFSCORRUPTED);
+       }
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       real_size = 0;
+       if (size == 0)
+               ifp->if_u1.if_data = NULL;
+       else if (size <= sizeof(ifp->if_u2.if_inline_data))
+               ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
+       else {
+               real_size = roundup(size, 4);
+               ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP);
+       }
+       ifp->if_bytes = size;
+       ifp->if_real_bytes = real_size;
+       if (size)
+               bcopy(XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_CONVERT), ifp->if_u1.if_data, size);
+       ifp->if_flags &= ~XFS_IFEXTENTS;
+       ifp->if_flags |= XFS_IFINLINE;
+       return 0;
+}
+
+/*
+ * The file consists of a set of extents all
+ * of which fit into the on-disk inode.
+ * If there are few enough extents to fit into
+ * the if_inline_ext, then copy them there.
+ * Otherwise allocate a buffer for them and copy
+ * them into it.  Either way, set if_extents
+ * to point at the extents.
+ */
+STATIC int
+xfs_iformat_extents(
+       xfs_inode_t     *ip,
+       xfs_dinode_t    *dip,
+       int             whichfork)
+{
+       xfs_ifork_t     *ifp;
+       int             nex;
+       int             real_size;
+       int             size;
+
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       nex = XFS_DFORK_NEXTENTS_ARCH(dip, whichfork, ARCH_CONVERT);
+       size = nex * (uint)sizeof(xfs_bmbt_rec_t);
+
+       /*
+        * If the number of extents is unreasonable, then something
+        * is wrong and we just bail out rather than crash in
+        * kmem_alloc() or bcopy() below.
+        */
+       if (size < 0 || size > XFS_DFORK_SIZE_ARCH(dip, ip->i_mount, whichfork, ARCH_CONVERT)) {
+               xfs_fs_cmn_err(CE_WARN, ip->i_mount,
+                       "corrupt inode %Lu ((a)extents = %d).  Unmount and run xfs_repair.",
+                       ip->i_ino, nex);
+               return XFS_ERROR(EFSCORRUPTED);
+       }
+
+       real_size = 0;
+       if (nex == 0)
+               ifp->if_u1.if_extents = NULL;
+       else if (nex <= XFS_INLINE_EXTS)
+               ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
+       else {
+               ifp->if_u1.if_extents = kmem_alloc(size, KM_SLEEP);
+               ASSERT(ifp->if_u1.if_extents != NULL);
+               real_size = size;
+       }
+       ifp->if_bytes = size;
+       ifp->if_real_bytes = real_size;
+       if (size) {
+               xfs_validate_extents(
+                       (xfs_bmbt_rec_32_t *)XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_CONVERT),
+                       nex, XFS_EXTFMT_INODE(ip));
+               bcopy(XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_CONVERT), ifp->if_u1.if_extents,
+                     size);
+               xfs_bmap_trace_exlist("xfs_iformat_extents", ip, nex,
+                       whichfork);
+               if (whichfork != XFS_DATA_FORK ||
+                       XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE)
+                               if (xfs_check_nostate_extents(
+                                   ifp->if_u1.if_extents, nex))
+                                       return XFS_ERROR(EFSCORRUPTED);
+       }
+       ifp->if_flags |= XFS_IFEXTENTS;
+       return 0;
+}
+
+/*
+ * The file has too many extents to fit into
+ * the inode, so they are in B-tree format.
+ * Allocate a buffer for the root of the B-tree
+ * and copy the root into it.  The i_extents
+ * field will remain NULL until all of the
+ * extents are read in (when they are needed).
+ */
+STATIC int
+xfs_iformat_btree(
+       xfs_inode_t             *ip,
+       xfs_dinode_t            *dip,
+       int                     whichfork)
+{
+       xfs_bmdr_block_t        *dfp;
+       xfs_ifork_t             *ifp;
+       /* REFERENCED */
+       int                     nrecs;
+       int                     size;
+
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_CONVERT);
+       size = XFS_BMAP_BROOT_SPACE(dfp);
+       nrecs = XFS_BMAP_BROOT_NUMRECS(dfp);
+
+       /*
+        * blow out if -- fork has less extents than can fit in
+        * fork (fork shouldn't be a btree format), root btree
+        * block has more records than can fit into the fork,
+        * or the number of extents is greater than the number of
+        * blocks.
+        */
+       if (XFS_IFORK_NEXTENTS(ip, whichfork) <= ifp->if_ext_max
+           || XFS_BMDR_SPACE_CALC(nrecs) >
+                       XFS_DFORK_SIZE_ARCH(dip, ip->i_mount, whichfork, ARCH_CONVERT)
+           || XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks) {
+               xfs_fs_cmn_err(CE_WARN, ip->i_mount,
+                       "corrupt inode %Lu (btree).  Unmount and run xfs_repair.",
+                       ip->i_ino);
+               return XFS_ERROR(EFSCORRUPTED);
+       }
+
+       ifp->if_broot_bytes = size;
+       ifp->if_broot = kmem_alloc(size, KM_SLEEP);
+       ASSERT(ifp->if_broot != NULL);
+       /*
+        * Copy and convert from the on-disk structure
+        * to the in-memory structure.
+        */
+       xfs_bmdr_to_bmbt(dfp, XFS_DFORK_SIZE_ARCH(dip, ip->i_mount, whichfork, ARCH_CONVERT),
+               ifp->if_broot, size);
+       ifp->if_flags &= ~XFS_IFEXTENTS;
+       ifp->if_flags |= XFS_IFBROOT;
+
+       return 0;
+}
+
+/*
+ * xfs_xlate_dinode_core - translate an xfs_inode_core_t between ondisk
+ * and native format
+ *
+ * buf  = on-disk representation 
+ * dip  = native representation 
+ * dir  = direction - +ve -> disk to native
+ *                    -ve -> native to disk
+ * arch = on-disk architecture
+ */
+void 
+xfs_xlate_dinode_core(xfs_caddr_t buf, xfs_dinode_core_t *dip, 
+    int dir, xfs_arch_t arch)
+{
+    xfs_dinode_core_t   *buf_core;
+    xfs_dinode_core_t   *mem_core;
+    
+    ASSERT(dir);
+    
+    buf_core=(xfs_dinode_core_t*)buf;
+    mem_core=(xfs_dinode_core_t*)dip;
+    
+    if (arch == ARCH_NOCONVERT) {
+        if (dir>0) {
+            bcopy((xfs_caddr_t)buf_core, (xfs_caddr_t)mem_core, sizeof(xfs_dinode_core_t));
+        } else {
+            bcopy((xfs_caddr_t)mem_core, (xfs_caddr_t)buf_core, sizeof(xfs_dinode_core_t));
+        }
+        return;
+    }
+    
+    INT_XLATE(buf_core->di_magic,       mem_core->di_magic,        dir, arch);
+    INT_XLATE(buf_core->di_mode,        mem_core->di_mode,         dir, arch);
+    INT_XLATE(buf_core->di_version,     mem_core->di_version,      dir, arch);
+    INT_XLATE(buf_core->di_format,      mem_core->di_format,       dir, arch);
+    INT_XLATE(buf_core->di_onlink,      mem_core->di_onlink,       dir, arch);
+    INT_XLATE(buf_core->di_uid,         mem_core->di_uid,          dir, arch);
+    INT_XLATE(buf_core->di_gid,         mem_core->di_gid,          dir, arch);
+    INT_XLATE(buf_core->di_nlink,       mem_core->di_nlink,        dir, arch);
+    INT_XLATE(buf_core->di_projid,      mem_core->di_projid,       dir, arch);
+    
+    if (dir>0) {
+        bcopy(buf_core->di_pad, mem_core->di_pad, sizeof(buf_core->di_pad));
+    } else {
+        bcopy(mem_core->di_pad, buf_core->di_pad, sizeof(buf_core->di_pad));
+    }
+    
+    INT_XLATE(buf_core->di_atime.t_sec, mem_core->di_atime.t_sec,  dir, arch);
+    INT_XLATE(buf_core->di_atime.t_nsec,mem_core->di_atime.t_nsec, dir, arch);
+    
+    INT_XLATE(buf_core->di_mtime.t_sec, mem_core->di_mtime.t_sec,  dir, arch);
+    INT_XLATE(buf_core->di_mtime.t_nsec,mem_core->di_mtime.t_nsec, dir, arch);
+    
+    INT_XLATE(buf_core->di_ctime.t_sec, mem_core->di_ctime.t_sec,  dir, arch);
+    INT_XLATE(buf_core->di_ctime.t_nsec,mem_core->di_ctime.t_nsec, dir, arch);
+    
+    INT_XLATE(buf_core->di_size,        mem_core->di_size,         dir, arch);
+    INT_XLATE(buf_core->di_nblocks,     mem_core->di_nblocks,      dir, arch);
+    INT_XLATE(buf_core->di_extsize,     mem_core->di_extsize,      dir, arch);
+    
+    INT_XLATE(buf_core->di_nextents,    mem_core->di_nextents,     dir, arch);
+    INT_XLATE(buf_core->di_anextents,   mem_core->di_anextents,    dir, arch);
+    INT_XLATE(buf_core->di_forkoff,     mem_core->di_forkoff,      dir, arch);
+    INT_XLATE(buf_core->di_aformat,     mem_core->di_aformat,      dir, arch);
+    INT_XLATE(buf_core->di_dmevmask,    mem_core->di_dmevmask,     dir, arch);
+    INT_XLATE(buf_core->di_dmstate,     mem_core->di_dmstate,      dir, arch);
+    INT_XLATE(buf_core->di_flags,       mem_core->di_flags,        dir, arch);
+    INT_XLATE(buf_core->di_gen,         mem_core->di_gen,          dir, arch);
+    
+}
+
+/*
+ * Given a mount structure and an inode number, return a pointer
+ * to a newly allocated in-core inode coresponding to the given
+ * inode number.
+ * 
+ * Initialize the inode's attributes and extent pointers if it
+ * already has them (it will not if the inode has no links).
+ */
+int
+xfs_iread(
+       xfs_mount_t     *mp,
+       xfs_trans_t     *tp,
+       xfs_ino_t       ino,
+       xfs_inode_t     **ipp,
+       xfs_daddr_t             bno)
+{
+       xfs_buf_t       *bp;
+       xfs_dinode_t    *dip;
+       xfs_inode_t     *ip;
+       int             error;
+
+       ASSERT(xfs_inode_zone != NULL);
+
+       ip = kmem_zone_zalloc(xfs_inode_zone, KM_SLEEP);
+       ip->i_ino = ino;
+       ip->i_dev = mp->m_dev;
+       ip->i_mount = mp;
+
+       /*
+        * Get pointer's to the on-disk inode and the buffer containing it.
+        * If the inode number refers to a block outside the file system
+        * then xfs_itobp() will return NULL.  In this case we should
+        * return NULL as well.  Set i_blkno to 0 so that xfs_itobp() will
+        * know that this is a new incore inode.
+        */
+       error = xfs_itobp(mp, tp, ip, &dip, &bp, bno);
+
+       if (error != 0) {
+               kmem_zone_free(xfs_inode_zone, ip);
+               return error;
+       }
+
+       /*
+        * Initialize inode's trace buffers.
+        * Do this before xfs_iformat in case it adds entries.
+        */
+#ifdef XFS_BMAP_TRACE
+       ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_SLEEP);
+#endif
+#ifdef XFS_BMBT_TRACE
+       ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_SLEEP);
+#endif
+#ifdef XFS_RW_TRACE
+       ip->i_rwtrace = ktrace_alloc(XFS_RW_KTRACE_SIZE, KM_SLEEP);
+#endif
+#ifdef XFS_STRAT_TRACE
+       ip->i_strat_trace = ktrace_alloc(XFS_STRAT_KTRACE_SIZE, KM_SLEEP);
+#endif
+#ifdef XFS_ILOCK_TRACE
+       ip->i_lock_trace = ktrace_alloc(XFS_ILOCK_KTRACE_SIZE, KM_SLEEP);
+#endif
+#ifdef XFS_DIR2_TRACE
+       ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_SLEEP);
+#endif
+
+       /*
+        * If we got something that isn't an inode it means someone
+        * (nfs or dmi) has a stale handle.
+        */
+        if (INT_GET(dip->di_core.di_magic, ARCH_CONVERT) != XFS_DINODE_MAGIC) {
+               kmem_zone_free(xfs_inode_zone, ip);
+               xfs_trans_brelse(tp, bp);
+               return XFS_ERROR(EINVAL);
+       }
+
+       /*
+        * If the on-disk inode is already linked to a directory
+        * entry, copy all of the inode into the in-core inode.
+        * xfs_iformat() handles copying in the inode format
+        * specific information.
+        * Otherwise, just get the truly permanent information.
+        */
+       if (!INT_ISZERO(dip->di_core.di_mode, ARCH_CONVERT)) {
+                xfs_xlate_dinode_core((xfs_caddr_t)&dip->di_core, 
+                     &(ip->i_d), 1, ARCH_CONVERT);
+               error = xfs_iformat(ip, dip);
+               if (error)  {
+                       kmem_zone_free(xfs_inode_zone, ip);
+                       xfs_trans_brelse(tp, bp);
+                       return error;
+               }
+       } else {
+               ip->i_d.di_magic = INT_GET(dip->di_core.di_magic, ARCH_CONVERT);
+               ip->i_d.di_version = INT_GET(dip->di_core.di_version, ARCH_CONVERT);
+               ip->i_d.di_gen = INT_GET(dip->di_core.di_gen, ARCH_CONVERT);
+               /*
+                * Make sure to pull in the mode here as well in
+                * case the inode is released without being used.
+                * This ensures that xfs_inactive() will see that
+                * the inode is already free and not try to mess
+                * with the uninitialized part of it.
+                */
+               ip->i_d.di_mode = 0;
+               /*
+                * Initialize the per-fork minima and maxima for a new
+                * inode here.  xfs_iformat will do it for old inodes.
+                */
+               ip->i_df.if_ext_max =
+                       XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
+       }       
+
+       /*
+        * The inode format changed when we moved the link count and
+        * made it 32 bits long.  If this is an old format inode,
+        * convert it in memory to look like a new one.  If it gets
+        * flushed to disk we will convert back before flushing or
+        * logging it.  We zero out the new projid field and the old link
+        * count field.  We'll handle clearing the pad field (the remains
+        * of the old uuid field) when we actually convert the inode to
+        * the new format. We don't change the version number so that we
+        * can distinguish this from a real new format inode.
+        */
+       if (ip->i_d.di_version == XFS_DINODE_VERSION_1) {
+               ip->i_d.di_nlink = ip->i_d.di_onlink;
+               ip->i_d.di_onlink = 0;
+               ip->i_d.di_projid = 0;
+       }
+
+       ip->i_delayed_blks = 0;
+
+       /*
+        * Mark the buffer containing the inode as something to keep
+        * around for a while.  This helps to keep recently accessed
+        * meta-data in-core longer.
+        */
+        XFS_BUF_SET_REF(bp, XFS_INO_REF);
+
+       /*
+        * Use xfs_trans_brelse() to release the buffer containing the
+        * on-disk inode, because it was acquired with xfs_trans_read_buf()
+        * in xfs_itobp() above.  If tp is NULL, this is just a normal
+        * brelse().  If we're within a transaction, then xfs_trans_brelse()
+        * will only release the buffer if it is not dirty within the
+        * transaction.  It will be OK to release the buffer in this case,
+        * because inodes on disk are never destroyed and we will be
+        * locking the new in-core inode before putting it in the hash
+        * table where other processes can find it.  Thus we don't have
+        * to worry about the inode being changed just because we released
+        * the buffer.
+        */
+       xfs_trans_brelse(tp, bp);
+       *ipp = ip;
+       return 0;
+}
+
+/*
+ * Read in extents from a btree-format inode.
+ * Allocate and fill in if_extents.  Real work is done in xfs_bmap.c.
+ */
+int
+xfs_iread_extents(
+       xfs_trans_t     *tp,
+       xfs_inode_t     *ip,
+       int             whichfork)
+{
+       int             error;
+       xfs_ifork_t     *ifp;
+       size_t          size;
+
+       if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)
+               return XFS_ERROR(EFSCORRUPTED);
+       size = XFS_IFORK_NEXTENTS(ip, whichfork) * (uint)sizeof(xfs_bmbt_rec_t);
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       /*
+        * We know that the size is legal (it's checked in iformat_btree)
+        */
+       ifp->if_u1.if_extents = kmem_alloc(size, KM_SLEEP);
+       ASSERT(ifp->if_u1.if_extents != NULL);
+       ifp->if_lastex = NULLEXTNUM;
+       ifp->if_bytes = ifp->if_real_bytes = (int)size;
+       ifp->if_flags |= XFS_IFEXTENTS;
+       error = xfs_bmap_read_extents(tp, ip, whichfork);
+       if (error) {
+               kmem_free(ifp->if_u1.if_extents, size);
+               ifp->if_u1.if_extents = NULL;
+               ifp->if_bytes = ifp->if_real_bytes = 0;
+               ifp->if_flags &= ~XFS_IFEXTENTS;
+               return error;
+       }
+       xfs_validate_extents((xfs_bmbt_rec_32_t *)ifp->if_u1.if_extents,
+               XFS_IFORK_NEXTENTS(ip, whichfork), XFS_EXTFMT_INODE(ip));
+       return 0;
+}
+
+/*
+ * Reallocate the space for if_broot based on the number of records
+ * being added or deleted as indicated in rec_diff.  Move the records
+ * and pointers in if_broot to fit the new size.  When shrinking this
+ * will eliminate holes between the records and pointers created by
+ * the caller.  When growing this will create holes to be filled in
+ * by the caller.
+ *
+ * The caller must not request to add more records than would fit in
+ * the on-disk inode root.  If the if_broot is currently NULL, then
+ * if we adding records one will be allocated.  The caller must also
+ * not request that the number of records go below zero, although
+ * it can go to zero.
+ *
+ * ip -- the inode whose if_broot area is changing
+ * ext_diff -- the change in the number of records, positive or negative,
+ *      requested for the if_broot array.
+ */
+void
+xfs_iroot_realloc(
+       xfs_inode_t             *ip,
+       int                     rec_diff,
+       int                     whichfork)
+{
+       int                     cur_max;
+       xfs_ifork_t             *ifp;
+       xfs_bmbt_block_t        *new_broot;
+       int                     new_max;
+       size_t                  new_size;
+       char                    *np;
+       char                    *op;
+
+       /*
+        * Handle the degenerate case quietly.
+        */
+       if (rec_diff == 0) {
+               return;
+       }
+
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       if (rec_diff > 0) {
+               /*
+                * If there wasn't any memory allocated before, just
+                * allocate it now and get out.
+                */
+               if (ifp->if_broot_bytes == 0) {
+                       new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(rec_diff);
+                       ifp->if_broot = (xfs_bmbt_block_t*)kmem_alloc(new_size,
+                                                                    KM_SLEEP);
+                       ifp->if_broot_bytes = (int)new_size;
+                       return;
+               }
+
+               /*
+                * If there is already an existing if_broot, then we need
+                * to realloc() it and shift the pointers to their new
+                * location.  The records don't change location because
+                * they are kept butted up against the btree block header.
+                */
+               cur_max = XFS_BMAP_BROOT_MAXRECS(ifp->if_broot_bytes);
+               new_max = cur_max + rec_diff;
+               new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max);
+               ifp->if_broot = (xfs_bmbt_block_t *) 
+                 kmem_realloc(ifp->if_broot,
+                               new_size,
+                               (size_t)XFS_BMAP_BROOT_SPACE_CALC(cur_max), /* old size */
+                               KM_SLEEP);
+               op = (char *)XFS_BMAP_BROOT_PTR_ADDR(ifp->if_broot, 1,
+                                                     ifp->if_broot_bytes);
+               np = (char *)XFS_BMAP_BROOT_PTR_ADDR(ifp->if_broot, 1,
+                                                     (int)new_size);
+               ifp->if_broot_bytes = (int)new_size;
+               ASSERT(ifp->if_broot_bytes <=
+                       XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ);
+               ovbcopy(op, np, cur_max * (uint)sizeof(xfs_dfsbno_t));
+               return;
+       }
+
+       /*
+        * rec_diff is less than 0.  In this case, we are shrinking the
+        * if_broot buffer.  It must already exist.  If we go to zero
+        * records, just get rid of the root and clear the status bit.
+        */
+       ASSERT((ifp->if_broot != NULL) && (ifp->if_broot_bytes > 0));
+       cur_max = XFS_BMAP_BROOT_MAXRECS(ifp->if_broot_bytes);
+       new_max = cur_max + rec_diff;
+       ASSERT(new_max >= 0);
+       if (new_max > 0)
+               new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max);
+       else
+               new_size = 0;
+       if (new_size > 0) {
+               new_broot = (xfs_bmbt_block_t *)kmem_alloc(new_size, KM_SLEEP);
+               /*
+                * First copy over the btree block header.
+                */
+               bcopy(ifp->if_broot, new_broot, sizeof(xfs_bmbt_block_t));
+       } else {
+               new_broot = NULL;
+               ifp->if_flags &= ~XFS_IFBROOT;
+       }
+
+       /*
+        * Only copy the records and pointers if there are any.
+        */
+       if (new_max > 0) {
+               /*
+                * First copy the records.
+                */
+               op = (char *)XFS_BMAP_BROOT_REC_ADDR(ifp->if_broot, 1,
+                                                    ifp->if_broot_bytes);
+               np = (char *)XFS_BMAP_BROOT_REC_ADDR(new_broot, 1,
+                                                    (int)new_size);
+               bcopy(op, np, new_max * (uint)sizeof(xfs_bmbt_rec_t));  
+
+               /*
+                * Then copy the pointers.
+                */
+               op = (char *)XFS_BMAP_BROOT_PTR_ADDR(ifp->if_broot, 1,
+                                                    ifp->if_broot_bytes);
+               np = (char *)XFS_BMAP_BROOT_PTR_ADDR(new_broot, 1,
+                                                    (int)new_size);
+               bcopy(op, np, new_max * (uint)sizeof(xfs_dfsbno_t));
+       }
+       kmem_free(ifp->if_broot, ifp->if_broot_bytes);
+       ifp->if_broot = new_broot;
+       ifp->if_broot_bytes = (int)new_size;
+       ASSERT(ifp->if_broot_bytes <=
+               XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ);
+       return;
+}
+
+/*
+ * This is called when the amount of space needed for if_extents
+ * is increased or decreased.  The change in size is indicated by
+ * the number of extents that need to be added or deleted in the
+ * ext_diff parameter.
+ *
+ * If the amount of space needed has decreased below the size of the
+ * inline buffer, then switch to using the inline buffer.  Otherwise,
+ * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer
+ * to what is needed.
+ *
+ * ip -- the inode whose if_extents area is changing
+ * ext_diff -- the change in the number of extents, positive or negative,
+ *      requested for the if_extents array.
+ */
+void
+xfs_iext_realloc(
+       xfs_inode_t     *ip,
+       int             ext_diff,
+       int             whichfork)
+{
+       int             byte_diff;
+       xfs_ifork_t     *ifp;
+       int             new_size;
+       uint            rnew_size;
+
+       if (ext_diff == 0) {
+               return;
+       }
+
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       byte_diff = ext_diff * (uint)sizeof(xfs_bmbt_rec_t);
+       new_size = (int)ifp->if_bytes + byte_diff;
+       ASSERT(new_size >= 0);
+
+       if (new_size == 0) {
+               if (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext) {
+                       ASSERT(ifp->if_real_bytes != 0);
+                       kmem_free(ifp->if_u1.if_extents, ifp->if_real_bytes);
+               }
+               ifp->if_u1.if_extents = NULL;
+               rnew_size = 0;
+       } else if (new_size <= sizeof(ifp->if_u2.if_inline_ext)) {
+               /*
+                * If the valid extents can fit in if_inline_ext,
+                * copy them from the malloc'd vector and free it.
+                */
+               if (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext) {
+                       /*
+                        * For now, empty files are format EXTENTS,
+                        * so the if_extents pointer is null.
+                        */
+                       if (ifp->if_u1.if_extents) {
+                               bcopy(ifp->if_u1.if_extents,
+                                     ifp->if_u2.if_inline_ext, new_size);
+                               kmem_free(ifp->if_u1.if_extents,
+                                         ifp->if_real_bytes);
+                       }
+                       ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
+               }
+               rnew_size = 0;
+       } else {
+               rnew_size = new_size;
+               if ((rnew_size & (rnew_size - 1)) != 0)
+                       rnew_size = xfs_iroundup(rnew_size);
+               /*
+                * Stuck with malloc/realloc.
+                */
+               if (ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext) {
+                       ifp->if_u1.if_extents = (xfs_bmbt_rec_t *)
+                               kmem_alloc(rnew_size, KM_SLEEP);
+                       bcopy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents,
+                             sizeof(ifp->if_u2.if_inline_ext));
+               } else if (rnew_size != ifp->if_real_bytes) {
+                       ifp->if_u1.if_extents = (xfs_bmbt_rec_t *)
+                         kmem_realloc(ifp->if_u1.if_extents,
+                                       rnew_size,
+                                       ifp->if_real_bytes,
+                                       KM_SLEEP);
+               }
+       }
+       ifp->if_real_bytes = rnew_size;
+       ifp->if_bytes = new_size;
+}
+
+
+/*
+ * This is called when the amount of space needed for if_data
+ * is increased or decreased.  The change in size is indicated by
+ * the number of bytes that need to be added or deleted in the
+ * byte_diff parameter.
+ *
+ * If the amount of space needed has decreased below the size of the
+ * inline buffer, then switch to using the inline buffer.  Otherwise,
+ * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer
+ * to what is needed.
+ *
+ * ip -- the inode whose if_data area is changing
+ * byte_diff -- the change in the number of bytes, positive or negative,
+ *      requested for the if_data array.
+ */
+void
+xfs_idata_realloc(
+       xfs_inode_t     *ip,
+       int             byte_diff,
+       int             whichfork)
+{
+       xfs_ifork_t     *ifp;
+       int             new_size;
+       int             real_size;
+
+       if (byte_diff == 0) {
+               return;
+       }
+
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       new_size = (int)ifp->if_bytes + byte_diff;
+       ASSERT(new_size >= 0);
+
+       if (new_size == 0) {
+               if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
+                       kmem_free(ifp->if_u1.if_data, ifp->if_real_bytes);
+               }
+               ifp->if_u1.if_data = NULL;
+               real_size = 0;
+       } else if (new_size <= sizeof(ifp->if_u2.if_inline_data)) {
+               /*
+                * If the valid extents/data can fit in if_inline_ext/data,
+                * copy them from the malloc'd vector and free it.
+                */
+               if (ifp->if_u1.if_data == NULL) {
+                       ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
+               } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
+                       ASSERT(ifp->if_real_bytes != 0);
+                       bcopy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data,
+                             new_size);
+                       kmem_free(ifp->if_u1.if_data, ifp->if_real_bytes);
+                       ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
+               }
+               real_size = 0;
+       } else {
+               /*
+                * Stuck with malloc/realloc.
+                * For inline data, the underlying buffer must be
+                * a multiple of 4 bytes in size so that it can be
+                * logged and stay on word boundaries.  We enforce
+                * that here.
+                */
+               real_size = roundup(new_size, 4);
+               if (ifp->if_u1.if_data == NULL) {
+                       ASSERT(ifp->if_real_bytes == 0);
+                       ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP);
+               } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
+                       /*
+                        * Only do the realloc if the underlying size
+                        * is really changing.
+                        */
+                       if (ifp->if_real_bytes != real_size) {
+                               ifp->if_u1.if_data =
+                                       kmem_realloc(ifp->if_u1.if_data,
+                                                       real_size,
+                                                       ifp->if_real_bytes,
+                                                       KM_SLEEP);
+                       }
+               } else {
+                       ASSERT(ifp->if_real_bytes == 0);
+                       ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP);
+                       bcopy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data,
+                             ifp->if_bytes);
+               }
+       }
+       ifp->if_real_bytes = real_size;
+       ifp->if_bytes = new_size;
+       ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
+}
+
+
+/*
+ * Map inode to disk block and offset.
+ *
+ * mp -- the mount point structure for the current file system
+ * tp -- the current transaction
+ * ino -- the inode number of the inode to be located
+ * imap -- this structure is filled in with the information necessary
+ *      to retrieve the given inode from disk
+ * flags -- flags to pass to xfs_dilocate indicating whether or not
+ *      lookups in the inode btree were OK or not
+ */
+int
+xfs_imap(
+       xfs_mount_t     *mp,
+       xfs_trans_t     *tp,
+       xfs_ino_t       ino,
+       xfs_imap_t      *imap,
+       uint            flags)
+{
+       xfs_fsblock_t   fsbno;
+       int             len;
+       int             off;
+       int             error;
+
+       fsbno = imap->im_blkno ?
+               XFS_DADDR_TO_FSB(mp, imap->im_blkno) : NULLFSBLOCK;
+       error = xfs_dilocate(mp, tp, ino, &fsbno, &len, &off, flags);
+       if (error != 0) {
+               return error;
+       }
+       imap->im_blkno = XFS_FSB_TO_DADDR(mp, fsbno);
+       imap->im_len = XFS_FSB_TO_BB(mp, len);
+       imap->im_agblkno = XFS_FSB_TO_AGBNO(mp, fsbno);
+       imap->im_ioffset = (ushort)off;
+       imap->im_boffset = (ushort)(off << mp->m_sb.sb_inodelog);
+       return 0;
+}
+
+void
+xfs_idestroy_fork(
+       xfs_inode_t     *ip,
+       int             whichfork)
+{
+       xfs_ifork_t     *ifp;
+
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       if (ifp->if_broot != NULL) {
+               kmem_free(ifp->if_broot, ifp->if_broot_bytes);
+               ifp->if_broot = NULL;
+       }
+
+       /*
+        * If the format is local, then we can't have an extents
+        * array so just look for an inline data array.  If we're
+        * not local then we may or may not have an extents list,
+        * so check and free it up if we do.
+        */
+       if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
+               if ((ifp->if_u1.if_data != ifp->if_u2.if_inline_data) && 
+                   (ifp->if_u1.if_data != NULL)) {
+                       ASSERT(ifp->if_real_bytes != 0);
+                       kmem_free(ifp->if_u1.if_data, ifp->if_real_bytes);
+                       ifp->if_u1.if_data = NULL;
+                       ifp->if_real_bytes = 0;
+               }
+       } else if ((ifp->if_flags & XFS_IFEXTENTS) &&
+                  (ifp->if_u1.if_extents != NULL) &&
+                  (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)) {
+               ASSERT(ifp->if_real_bytes != 0);
+               kmem_free(ifp->if_u1.if_extents, ifp->if_real_bytes);
+               ifp->if_u1.if_extents = NULL;
+               ifp->if_real_bytes = 0;
+       }
+       ASSERT(ifp->if_u1.if_extents == NULL ||
+              ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext);
+       ASSERT(ifp->if_real_bytes == 0);
+       if (whichfork == XFS_ATTR_FORK) {
+               kmem_zone_free(xfs_ifork_zone, ip->i_afp);
+               ip->i_afp = NULL;
+       }
+}
+
+/*
+ * xfs_iroundup: round up argument to next power of two
+ */
+uint
+xfs_iroundup(
+       uint    v)
+{
+       int i;
+       uint m;
+
+       if ((v & (v - 1)) == 0)
+               return v;
+       ASSERT((v & 0x80000000) == 0);
+       if ((v & (v + 1)) == 0)
+               return v + 1;
+       for (i = 0, m = 1; i < 31; i++, m <<= 1) {
+               if (v & m)
+                       continue;
+               v |= m;
+               if ((v & (v + 1)) == 0)
+                       return v + 1;
+       }
+       ASSERT(0);
+       return( 0 );
+}
+
+/*
+ * xfs_iextents_copy()
+ *
+ * This is called to copy the REAL extents (as opposed to the delayed
+ * allocation extents) from the inode into the given buffer.  It
+ * returns the number of bytes copied into the buffer.
+ *
+ * If there are no delayed allocation extents, then we can just
+ * bcopy() the extents into the buffer.  Otherwise, we need to
+ * examine each extent in turn and skip those which are delayed.
+ */
+int
+xfs_iextents_copy(
+       xfs_inode_t             *ip,
+       xfs_bmbt_rec_32_t       *buffer,
+       int                     whichfork)
+{
+       int                     copied;
+       xfs_bmbt_rec_32_t       *dest_ep;
+       xfs_bmbt_rec_t          *ep;
+#ifdef DEBUG
+       xfs_exntfmt_t           fmt = XFS_EXTFMT_INODE(ip);
+#endif
+#ifdef XFS_BMAP_TRACE
+       static char             fname[] = "xfs_iextents_copy";
+#endif
+       int                     i;
+       xfs_ifork_t             *ifp;
+       int                     nrecs;
+       xfs_fsblock_t           start_block;
+
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE|MR_ACCESS));
+       ASSERT(ifp->if_bytes > 0);
+
+       nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+       xfs_bmap_trace_exlist(fname, ip, nrecs, whichfork);
+       ASSERT(nrecs > 0);
+       if (nrecs == XFS_IFORK_NEXTENTS(ip, whichfork)) {
+               /*
+                * There are no delayed allocation extents,
+                * so just copy everything.
+                */
+               ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
+               ASSERT(ifp->if_bytes ==
+                      (XFS_IFORK_NEXTENTS(ip, whichfork) *
+                       (uint)sizeof(xfs_bmbt_rec_t)));
+               bcopy(ifp->if_u1.if_extents, buffer, ifp->if_bytes);
+               xfs_validate_extents(buffer, nrecs, fmt);
+               return ifp->if_bytes;
+       }
+
+       ASSERT(whichfork == XFS_DATA_FORK);
+       /*
+        * There are some delayed allocation extents in the
+        * inode, so copy the extents one at a time and skip
+        * the delayed ones.  There must be at least one
+        * non-delayed extent.
+        */
+       ASSERT(nrecs > ip->i_d.di_nextents);
+       ep = ifp->if_u1.if_extents;
+       dest_ep = buffer;
+       copied = 0;
+       for (i = 0; i < nrecs; i++) {
+               start_block = xfs_bmbt_get_startblock(ep);
+               if (ISNULLSTARTBLOCK(start_block)) {
+                       /*
+                        * It's a delayed allocation extent, so skip it.
+                        */
+                       ep++;
+                       continue;
+               }
+
+               *dest_ep = *(xfs_bmbt_rec_32_t *)ep;
+               dest_ep++;
+               ep++;
+               copied++;
+       }
+       ASSERT(copied != 0);
+       ASSERT(copied == ip->i_d.di_nextents);
+       ASSERT((copied * (uint)sizeof(xfs_bmbt_rec_t)) <= XFS_IFORK_DSIZE(ip));
+       xfs_validate_extents(buffer, copied, fmt);
+
+       return (copied * (uint)sizeof(xfs_bmbt_rec_t));
+}                
+
+/*
+ * Each of the following cases stores data into the same region
+ * of the on-disk inode, so only one of them can be valid at
+ * any given time. While it is possible to have conflicting formats
+ * and log flags, e.g. having XFS_ILOG_?DATA set when the fork is
+ * in EXTENTS format, this can only happen when the fork has
+ * changed formats after being modified but before being flushed.
+ * In these cases, the format always takes precedence, because the
+ * format indicates the current state of the fork.
+ */
+STATIC int
+xfs_iflush_fork(
+       xfs_inode_t             *ip,
+       xfs_dinode_t            *dip,
+       xfs_inode_log_item_t    *iip,
+       int                     whichfork,
+       xfs_buf_t               *bp)
+{
+       char                    *cp;
+       xfs_ifork_t             *ifp;
+       xfs_mount_t             *mp;
+#ifdef XFS_TRANS_DEBUG
+       int                     first;
+#endif
+       static const short      brootflag[2] =
+               { XFS_ILOG_DBROOT, XFS_ILOG_ABROOT };
+       static const short      dataflag[2] =
+               { XFS_ILOG_DDATA, XFS_ILOG_ADATA };
+       static const short      extflag[2] =
+               { XFS_ILOG_DEXT, XFS_ILOG_AEXT };
+
+       if (iip == NULL)
+               return 0;
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       /*
+        * This can happen if we gave up in iformat in an error path,
+        * for the attribute fork.
+        */
+       if (ifp == NULL) {
+               ASSERT(whichfork == XFS_ATTR_FORK);
+               return 0;
+       }
+       cp = XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_CONVERT);
+       mp = ip->i_mount;
+       switch (XFS_IFORK_FORMAT(ip, whichfork)) {
+       case XFS_DINODE_FMT_LOCAL:
+               if ((iip->ili_format.ilf_fields & dataflag[whichfork]) &&
+                   (ifp->if_bytes > 0)) {
+                       ASSERT(ifp->if_u1.if_data != NULL);
+                       ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
+                       bcopy(ifp->if_u1.if_data, cp, ifp->if_bytes);
+               }
+               if (whichfork == XFS_DATA_FORK) {
+                       if (XFS_DIR_SHORTFORM_VALIDATE_ONDISK(mp, dip)) {
+                               return XFS_ERROR(EFSCORRUPTED);
+                       }
+               }
+               break;
+
+       case XFS_DINODE_FMT_EXTENTS:
+               ASSERT((ifp->if_flags & XFS_IFEXTENTS) ||
+                      !(iip->ili_format.ilf_fields & extflag[whichfork]));
+               ASSERT((ifp->if_u1.if_extents != NULL) || (ifp->if_bytes == 0));
+               ASSERT((ifp->if_u1.if_extents == NULL) || (ifp->if_bytes > 0));
+               if ((iip->ili_format.ilf_fields & extflag[whichfork]) &&
+                   (ifp->if_bytes > 0)) {
+                       ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0);
+                       (void)xfs_iextents_copy(ip, (xfs_bmbt_rec_32_t *)cp,
+                               whichfork);
+               }
+               break;
+
+       case XFS_DINODE_FMT_BTREE:
+               if ((iip->ili_format.ilf_fields & brootflag[whichfork]) &&
+                   (ifp->if_broot_bytes > 0)) {
+                       ASSERT(ifp->if_broot != NULL);
+                       ASSERT(ifp->if_broot_bytes <=
+                              (XFS_IFORK_SIZE(ip, whichfork) +
+                               XFS_BROOT_SIZE_ADJ));
+                       xfs_bmbt_to_bmdr(ifp->if_broot, ifp->if_broot_bytes,
+                               (xfs_bmdr_block_t *)cp,
+                               XFS_DFORK_SIZE_ARCH(dip, mp, whichfork, ARCH_CONVERT));
+               }
+               break;
+
+       case XFS_DINODE_FMT_DEV:
+               if (iip->ili_format.ilf_fields & XFS_ILOG_DEV) {
+                       ASSERT(whichfork == XFS_DATA_FORK);
+                       INT_SET(dip->di_u.di_dev, ARCH_CONVERT, ip->i_df.if_u2.if_rdev);
+               }
+               break;
+               
+       case XFS_DINODE_FMT_UUID:
+               if (iip->ili_format.ilf_fields & XFS_ILOG_UUID) {
+                       ASSERT(whichfork == XFS_DATA_FORK);
+                       bcopy(&ip->i_df.if_u2.if_uuid, &dip->di_u.di_muuid,
+                               sizeof(uuid_t));
+               }
+               break;
+
+       default:
+               ASSERT(0);
+               break;
+       }
+
+       return 0;
+}
diff --git a/libxfs/xfs_mount.c b/libxfs/xfs_mount.c
new file mode 100644 (file)
index 0000000..c4de3b9
--- /dev/null
@@ -0,0 +1,214 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <xfs.h>
+
+/*
+ * Mount initialization code establishing various mount
+ * fields from the superblock associated with the given
+ * mount structure.
+ */
+void
+xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp)
+{
+       int     i;
+
+       mp->m_agfrotor = mp->m_agirotor = 0;
+       mp->m_blkbit_log = sbp->sb_blocklog + XFS_NBBYLOG;
+       mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT;
+       mp->m_agno_log = xfs_highbit32(sbp->sb_agcount - 1) + 1;
+       mp->m_agino_log = sbp->sb_inopblog + sbp->sb_agblklog;
+       mp->m_litino = sbp->sb_inodesize -
+               ((uint)sizeof(xfs_dinode_core_t) + (uint)sizeof(xfs_agino_t));
+       mp->m_blockmask = sbp->sb_blocksize - 1;
+       mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG;
+       mp->m_blockwmask = mp->m_blockwsize - 1;
+
+       /*
+        * Setup for attributes, in case they get created.
+        * This value is for inodes getting attributes for the first time,
+        * the per-inode value is for old attribute values.
+        */
+       ASSERT(sbp->sb_inodesize >= 256 && sbp->sb_inodesize <= 2048);
+       switch (sbp->sb_inodesize) {
+       case 256:
+               mp->m_attroffset = XFS_LITINO(mp) - XFS_BMDR_SPACE_CALC(2);
+               break;
+       case 512:
+       case 1024:
+       case 2048:
+               mp->m_attroffset = XFS_BMDR_SPACE_CALC(12);
+               break;
+       default:
+               ASSERT(0);
+       }
+       ASSERT(mp->m_attroffset < XFS_LITINO(mp));
+
+       for (i = 0; i < 2; i++) {
+               mp->m_alloc_mxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize,
+                       xfs_alloc, i == 0);
+               mp->m_alloc_mnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize,
+                       xfs_alloc, i == 0);
+       }
+       for (i = 0; i < 2; i++) {
+               mp->m_bmap_dmxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize,
+                       xfs_bmbt, i == 0);
+               mp->m_bmap_dmnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize,
+                       xfs_bmbt, i == 0);
+       }
+       for (i = 0; i < 2; i++) {
+               mp->m_inobt_mxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize,
+                       xfs_inobt, i == 0);
+               mp->m_inobt_mnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize,
+                       xfs_inobt, i == 0);
+       }
+
+       mp->m_bsize = XFS_FSB_TO_BB(mp, 1);
+       mp->m_ialloc_inos = (int)MAX(XFS_INODES_PER_CHUNK, sbp->sb_inopblock);
+       mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog;
+}
+
+static struct {
+    short offset;
+    short type;     /* 0 = integer
+                     * 1 = binary / string (no translation)
+                     */
+} xfs_sb_info[] = {
+    { offsetof(xfs_sb_t, sb_magicnum),   0 },
+    { offsetof(xfs_sb_t, sb_blocksize),  0 },
+    { offsetof(xfs_sb_t, sb_dblocks),    0 },
+    { offsetof(xfs_sb_t, sb_rblocks),    0 },
+    { offsetof(xfs_sb_t, sb_rextents),   0 },
+    { offsetof(xfs_sb_t, sb_uuid),       1 },
+    { offsetof(xfs_sb_t, sb_logstart),   0 },
+    { offsetof(xfs_sb_t, sb_rootino),    0 },
+    { offsetof(xfs_sb_t, sb_rbmino),     0 },
+    { offsetof(xfs_sb_t, sb_rsumino),    0 },
+    { offsetof(xfs_sb_t, sb_rextsize),   0 },
+    { offsetof(xfs_sb_t, sb_agblocks),   0 },
+    { offsetof(xfs_sb_t, sb_agcount),    0 },
+    { offsetof(xfs_sb_t, sb_rbmblocks),  0 },
+    { offsetof(xfs_sb_t, sb_logblocks),  0 },
+    { offsetof(xfs_sb_t, sb_versionnum), 0 },
+    { offsetof(xfs_sb_t, sb_sectsize),   0 },
+    { offsetof(xfs_sb_t, sb_inodesize),  0 },
+    { offsetof(xfs_sb_t, sb_inopblock),  0 },
+    { offsetof(xfs_sb_t, sb_fname[0]),   1 },
+    { offsetof(xfs_sb_t, sb_blocklog),   0 },
+    { offsetof(xfs_sb_t, sb_sectlog),    0 },
+    { offsetof(xfs_sb_t, sb_inodelog),   0 },
+    { offsetof(xfs_sb_t, sb_inopblog),   0 },
+    { offsetof(xfs_sb_t, sb_agblklog),   0 },
+    { offsetof(xfs_sb_t, sb_rextslog),   0 },
+    { offsetof(xfs_sb_t, sb_inprogress), 0 },
+    { offsetof(xfs_sb_t, sb_imax_pct),   0 },
+    { offsetof(xfs_sb_t, sb_icount),     0 },
+    { offsetof(xfs_sb_t, sb_ifree),      0 },
+    { offsetof(xfs_sb_t, sb_fdblocks),   0 },
+    { offsetof(xfs_sb_t, sb_frextents),  0 },
+    { offsetof(xfs_sb_t, sb_uquotino),   0 },
+    { offsetof(xfs_sb_t, sb_pquotino),   0 },
+    { offsetof(xfs_sb_t, sb_qflags),     0 },
+    { offsetof(xfs_sb_t, sb_flags),      0 },
+    { offsetof(xfs_sb_t, sb_shared_vn),  0 },
+    { offsetof(xfs_sb_t, sb_inoalignmt), 0 },
+    { offsetof(xfs_sb_t, sb_unit),       0 },
+    { offsetof(xfs_sb_t, sb_width),      0 },
+    { offsetof(xfs_sb_t, sb_dirblklog),  0 },
+    { offsetof(xfs_sb_t, sb_dummy),      1 },
+    { sizeof(xfs_sb_t),                  0 }
+};
+
+/*
+ * xfs_xlatesb
+ *     data       - on disk version of sb
+ *     sb         - a superblock
+ *     dir        - conversion direction: <0 - convert sb to buf
+ *                                        >0 - convert buf to sb
+ *     arch       - architecture to read/write from/to buf
+ *     fields     - which fields to copy (bitmask)
+ */
+void
+xfs_xlatesb(void *data, xfs_sb_t *sb, int dir, xfs_arch_t arch, 
+            __int64_t fields)
+{
+    xfs_caddr_t     buf_ptr;
+    xfs_caddr_t     mem_ptr;
+           
+    ASSERT(dir);
+    ASSERT(fields);
+
+    if (!fields)
+        return;
+    
+    buf_ptr=(xfs_caddr_t)data;
+    mem_ptr=(xfs_caddr_t)sb;
+    
+    while (fields) {
+       xfs_sb_field_t  f;
+       int             first;
+       int             size;
+
+       f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields);
+       first = xfs_sb_info[f].offset;
+       size = xfs_sb_info[f + 1].offset - first;
+        
+        ASSERT(xfs_sb_info[f].type==0 || xfs_sb_info[f].type==1);
+        
+        if (arch == ARCH_NOCONVERT || size==1 || xfs_sb_info[f].type==1) {
+            if (dir>0) {
+               bcopy(buf_ptr + first, mem_ptr + first, size);
+            } else {
+               bcopy(mem_ptr + first, buf_ptr + first, size);
+            }
+        } else {
+            switch (size) {
+                case 2:  
+                    INT_XLATE(*(__uint16_t*)(buf_ptr+first),
+                             *(__uint16_t*)(mem_ptr+first), dir, arch);
+                    break;
+                case 4:  
+                    INT_XLATE(*(__uint32_t*)(buf_ptr+first),
+                             *(__uint32_t*)(mem_ptr+first), dir, arch);
+                    break;
+                case 8:  
+                    INT_XLATE(*(__uint64_t*)(buf_ptr+first),
+                             *(__uint64_t*)(mem_ptr+first), dir, arch);
+                    break;
+                default: 
+                    ASSERT(0);
+            }
+        }
+       fields &= ~(1LL << f);
+    }
+    
+}
diff --git a/libxfs/xfs_rtalloc.c b/libxfs/xfs_rtalloc.c
new file mode 100644 (file)
index 0000000..8f0a447
--- /dev/null
@@ -0,0 +1,835 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+/*
+ * Free realtime space allocation for XFS.
+ */
+#include <xfs.h>
+
+
+/*
+ * Get a buffer for the bitmap or summary file block specified.
+ * The buffer is returned read and locked.
+ */
+STATIC int                             /* error */
+xfs_rtbuf_get(
+       xfs_mount_t     *mp,            /* file system mount structure */
+       xfs_trans_t     *tp,            /* transaction pointer */
+       xfs_rtblock_t   block,          /* block number in bitmap or summary */
+       int             issum,          /* is summary not bitmap */
+       xfs_buf_t               **bpp)          /* output: buffer for the block */
+{
+       xfs_buf_t               *bp;            /* block buffer, result */
+       xfs_daddr_t             d;              /* disk addr of block */
+       int             error;          /* error value */
+       xfs_fsblock_t   fsb;            /* fs block number for block */
+       xfs_inode_t     *ip;            /* bitmap or summary inode */
+
+       ip = issum ? mp->m_rsumip : mp->m_rbmip;
+       /*
+        * Map from the file offset (block) and inode number to the
+        * file system block.
+        */
+       error = xfs_bmapi_single(tp, ip, XFS_DATA_FORK, &fsb, block);
+       if (error) {
+               return error;
+       }
+       ASSERT(fsb != NULLFSBLOCK);
+       /*
+        * Convert to disk address for buffer cache.
+        */
+       d = XFS_FSB_TO_DADDR(mp, fsb);
+       /*
+        * Read the buffer.
+        */
+       error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
+                                  mp->m_bsize, 0, &bp);
+       if (error) {
+               return error;
+       }
+       ASSERT(bp && !XFS_BUF_GETERROR(bp));
+       *bpp = bp;
+       return 0;
+}
+
+/*
+ * Searching backward from start to limit, find the first block whose
+ * allocated/free state is different from start's.
+ */
+STATIC int                             /* error */
+xfs_rtfind_back(
+       xfs_mount_t     *mp,            /* file system mount point */
+       xfs_trans_t     *tp,            /* transaction pointer */
+       xfs_rtblock_t   start,          /* starting block to look at */
+       xfs_rtblock_t   limit,          /* last block to look at */
+       xfs_rtblock_t   *rtblock)       /* out: start block found */
+{
+       xfs_rtword_t    *b;             /* current word in buffer */
+       int             bit;            /* bit number in the word */
+       xfs_rtblock_t   block;          /* bitmap block number */
+       xfs_buf_t               *bp;            /* buf for the block */
+       xfs_rtword_t    *bufp;          /* starting word in buffer */
+       int             error;          /* error value */
+       xfs_rtblock_t   firstbit;       /* first useful bit in the word */
+       xfs_rtblock_t   i;              /* current bit number rel. to start */
+       xfs_rtblock_t   len;            /* length of inspected area */
+       xfs_rtword_t    mask;           /* mask of relevant bits for value */
+       xfs_rtword_t    want;           /* mask for "good" values */
+       xfs_rtword_t    wdiff;          /* difference from wanted value */
+       int             word;           /* word number in the buffer */
+
+       /*
+        * Compute and read in starting bitmap block for starting block.
+        */
+       block = XFS_BITTOBLOCK(mp, start);
+       error = xfs_rtbuf_get(mp, tp, block, 0, &bp);
+       if (error) {
+               return error;
+       }
+       bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+       /*
+        * Get the first word's index & point to it.
+        */
+       word = XFS_BITTOWORD(mp, start);
+       b = &bufp[word];
+       bit = (int)(start & (XFS_NBWORD - 1));
+       len = start - limit + 1;
+       /*
+        * Compute match value, based on the bit at start: if 1 (free)
+        * then all-ones, else all-zeroes.
+        */
+       want = (*b & ((xfs_rtword_t)1 << bit)) ? -1 : 0;
+       /*
+        * If the starting position is not word-aligned, deal with the
+        * partial word.
+        */
+       if (bit < XFS_NBWORD - 1) {
+               /*
+                * Calculate first (leftmost) bit number to look at,
+                * and mask for all the relevant bits in this word.
+                */
+               firstbit = XFS_RTMAX((xfs_srtblock_t)(bit - len + 1), 0);
+               mask = (((xfs_rtword_t)1 << (bit - firstbit + 1)) - 1) <<
+                       firstbit;
+               /*
+                * Calculate the difference between the value there
+                * and what we're looking for.
+                */
+               if (wdiff = (*b ^ want) & mask) {
+                       /*
+                        * Different.  Mark where we are and return.
+                        */
+                       xfs_trans_brelse(tp, bp);
+                       i = bit - XFS_RTHIBIT(wdiff);
+                       *rtblock = start - i + 1;
+                       return 0;
+               }
+               i = bit - firstbit + 1;
+               /*
+                * Go on to previous block if that's where the previous word is
+                * and we need the previous word.
+                */
+               if (--word == -1 && i < len) {
+                       /*
+                        * If done with this block, get the previous one.
+                        */
+                       xfs_trans_brelse(tp, bp);
+                       error = xfs_rtbuf_get(mp, tp, --block, 0, &bp);
+                       if (error) {
+                               return error;
+                       }
+                       bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+                       word = XFS_BLOCKWMASK(mp);
+                       b = &bufp[word];
+               } else {
+                       /*
+                        * Go on to the previous word in the buffer.
+                        */
+                       b--;
+               }
+       } else {
+               /*
+                * Starting on a word boundary, no partial word.
+                */
+               i = 0;
+       }
+       /*
+        * Loop over whole words in buffers.  When we use up one buffer
+        * we move on to the previous one.
+        */
+       while (len - i >= XFS_NBWORD) {
+               /*
+                * Compute difference between actual and desired value.
+                */
+               if (wdiff = *b ^ want) {
+                       /*
+                        * Different, mark where we are and return.
+                        */
+                       xfs_trans_brelse(tp, bp);
+                       i += XFS_NBWORD - 1 - XFS_RTHIBIT(wdiff);
+                       *rtblock = start - i + 1;
+                       return 0;
+               }
+               i += XFS_NBWORD;
+               /*
+                * Go on to previous block if that's where the previous word is
+                * and we need the previous word.
+                */
+               if (--word == -1 && i < len) {
+                       /*
+                        * If done with this block, get the previous one.
+                        */
+                       xfs_trans_brelse(tp, bp);
+                       error = xfs_rtbuf_get(mp, tp, --block, 0, &bp);
+                       if (error) {
+                               return error;
+                       }
+                       bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+                       word = XFS_BLOCKWMASK(mp);
+                       b = &bufp[word];
+               } else {
+                       /*
+                        * Go on to the previous word in the buffer.
+                        */
+                       b--;
+               }
+       }
+       /*
+        * If not ending on a word boundary, deal with the last
+        * (partial) word.
+        */
+       if (len - i) {
+               /*
+                * Calculate first (leftmost) bit number to look at,
+                * and mask for all the relevant bits in this word.
+                */
+               firstbit = XFS_NBWORD - (len - i);
+               mask = (((xfs_rtword_t)1 << (len - i)) - 1) << firstbit;
+               /*
+                * Compute difference between actual and desired value.
+                */
+               if (wdiff = (*b ^ want) & mask) {
+                       /*
+                        * Different, mark where we are and return.
+                        */
+                       xfs_trans_brelse(tp, bp);
+                       i += XFS_NBWORD - 1 - XFS_RTHIBIT(wdiff);
+                       *rtblock = start - i + 1;
+                       return 0;
+               } else
+                       i = len;
+       }
+       /*
+        * No match, return that we scanned the whole area.
+        */
+       xfs_trans_brelse(tp, bp);
+       *rtblock = start - i + 1;
+       return 0;
+}
+
+/*
+ * Searching forward from start to limit, find the first block whose
+ * allocated/free state is different from start's.
+ */
+STATIC int                             /* error */
+xfs_rtfind_forw(
+       xfs_mount_t     *mp,            /* file system mount point */
+       xfs_trans_t     *tp,            /* transaction pointer */
+       xfs_rtblock_t   start,          /* starting block to look at */
+       xfs_rtblock_t   limit,          /* last block to look at */
+       xfs_rtblock_t   *rtblock)       /* out: start block found */
+{
+       xfs_rtword_t    *b;             /* current word in buffer */
+       int             bit;            /* bit number in the word */
+       xfs_rtblock_t   block;          /* bitmap block number */
+       xfs_buf_t               *bp;            /* buf for the block */
+       xfs_rtword_t    *bufp;          /* starting word in buffer */
+       int             error;          /* error value */
+       xfs_rtblock_t   i;              /* current bit number rel. to start */
+       xfs_rtblock_t   lastbit;        /* last useful bit in the word */
+       xfs_rtblock_t   len;            /* length of inspected area */
+       xfs_rtword_t    mask;           /* mask of relevant bits for value */
+       xfs_rtword_t    want;           /* mask for "good" values */
+       xfs_rtword_t    wdiff;          /* difference from wanted value */
+       int             word;           /* word number in the buffer */
+
+       /*
+        * Compute and read in starting bitmap block for starting block.
+        */
+       block = XFS_BITTOBLOCK(mp, start);
+       error = xfs_rtbuf_get(mp, tp, block, 0, &bp);
+       if (error) {
+               return error;
+       }
+       bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+       /*
+        * Get the first word's index & point to it.
+        */
+       word = XFS_BITTOWORD(mp, start);
+       b = &bufp[word];
+       bit = (int)(start & (XFS_NBWORD - 1));
+       len = limit - start + 1;
+       /*
+        * Compute match value, based on the bit at start: if 1 (free)
+        * then all-ones, else all-zeroes.
+        */
+       want = (*b & ((xfs_rtword_t)1 << bit)) ? -1 : 0;
+       /*
+        * If the starting position is not word-aligned, deal with the
+        * partial word.
+        */
+       if (bit) {
+               /*
+                * Calculate last (rightmost) bit number to look at,
+                * and mask for all the relevant bits in this word.
+                */
+               lastbit = XFS_RTMIN(bit + len, XFS_NBWORD);
+               mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit;
+               /*
+                * Calculate the difference between the value there
+                * and what we're looking for.
+                */
+               if (wdiff = (*b ^ want) & mask) {
+                       /*
+                        * Different.  Mark where we are and return.
+                        */
+                       xfs_trans_brelse(tp, bp);
+                       i = XFS_RTLOBIT(wdiff) - bit;
+                       *rtblock = start + i - 1;
+                       return 0;
+               }
+               i = lastbit - bit;
+               /*
+                * Go on to next block if that's where the next word is
+                * and we need the next word.
+                */
+               if (++word == XFS_BLOCKWSIZE(mp) && i < len) {
+                       /*
+                        * If done with this block, get the previous one.
+                        */
+                       xfs_trans_brelse(tp, bp);
+                       error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp);
+                       if (error) {
+                               return error;
+                       }
+                       b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+                       word = 0;
+               } else {
+                       /*
+                        * Go on to the previous word in the buffer.
+                        */
+                       b++;
+               }
+       } else {
+               /*
+                * Starting on a word boundary, no partial word.
+                */
+               i = 0;
+       }
+       /*
+        * Loop over whole words in buffers.  When we use up one buffer
+        * we move on to the next one.
+        */
+       while (len - i >= XFS_NBWORD) {
+               /*
+                * Compute difference between actual and desired value.
+                */
+               if (wdiff = *b ^ want) {
+                       /*
+                        * Different, mark where we are and return.
+                        */
+                       xfs_trans_brelse(tp, bp);
+                       i += XFS_RTLOBIT(wdiff);
+                       *rtblock = start + i - 1;
+                       return 0;
+               }
+               i += XFS_NBWORD;
+               /*
+                * Go on to next block if that's where the next word is
+                * and we need the next word.
+                */
+               if (++word == XFS_BLOCKWSIZE(mp) && i < len) {
+                       /*
+                        * If done with this block, get the next one.
+                        */
+                       xfs_trans_brelse(tp, bp);
+                       error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp);
+                       if (error) {
+                               return error;
+                       }
+                       b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+                       word = 0;
+               } else {
+                       /*
+                        * Go on to the next word in the buffer.
+                        */
+                       b++;
+               }
+       }
+       /*
+        * If not ending on a word boundary, deal with the last
+        * (partial) word.
+        */
+       if (lastbit = len - i) {
+               /*
+                * Calculate mask for all the relevant bits in this word.
+                */
+               mask = ((xfs_rtword_t)1 << lastbit) - 1;
+               /*
+                * Compute difference between actual and desired value.
+                */
+               if (wdiff = (*b ^ want) & mask) {
+                       /*
+                        * Different, mark where we are and return.
+                        */
+                       xfs_trans_brelse(tp, bp);
+                       i += XFS_RTLOBIT(wdiff);
+                       *rtblock = start + i - 1;
+                       return 0;
+               } else
+                       i = len;
+       }
+       /*
+        * No match, return that we scanned the whole area.
+        */
+       xfs_trans_brelse(tp, bp);
+       *rtblock = start + i - 1;
+       return 0;
+}
+
+/*
+ * Mark an extent specified by start and len freed.
+ * Updates all the summary information as well as the bitmap.
+ */
+STATIC int                             /* error */
+xfs_rtfree_range(
+       xfs_mount_t     *mp,            /* file system mount point */
+       xfs_trans_t     *tp,            /* transaction pointer */
+       xfs_rtblock_t   start,          /* starting block to free */
+       xfs_extlen_t    len,            /* length to free */
+       xfs_buf_t               **rbpp,         /* in/out: summary block buffer */
+       xfs_fsblock_t   *rsb)           /* in/out: summary block number */
+{
+       xfs_rtblock_t   end;            /* end of the freed extent */
+       int             error;          /* error value */
+       xfs_rtblock_t   postblock;      /* first block freed > end */
+       xfs_rtblock_t   preblock;       /* first block freed < start */
+
+       end = start + len - 1;
+       /*
+        * Modify the bitmap to mark this extent freed.
+        */
+       error = xfs_rtmodify_range(mp, tp, start, len, 1);
+       if (error) {
+               return error;
+       }
+       /*
+        * Assume we're freeing out of the middle of an allocated extent.
+        * We need to find the beginning and end of the extent so we can
+        * properly update the summary.
+        */
+       error = xfs_rtfind_back(mp, tp, start, 0, &preblock);
+       if (error) {
+               return error;
+       }
+       /*
+        * Find the next allocated block (end of allocated extent).
+        */
+       error = xfs_rtfind_forw(mp, tp, end, mp->m_sb.sb_rextents - 1,
+               &postblock);
+       /*
+        * If there are blocks not being freed at the front of the
+        * old extent, add summary data for them to be allocated.
+        */
+       if (preblock < start) {
+               error = xfs_rtmodify_summary(mp, tp,
+                       XFS_RTBLOCKLOG(start - preblock),
+                       XFS_BITTOBLOCK(mp, preblock), -1, rbpp, rsb);
+               if (error) {
+                       return error;
+               }
+       }
+       /*
+        * If there are blocks not being freed at the end of the
+        * old extent, add summary data for them to be allocated.
+        */
+       if (postblock > end) {
+               error = xfs_rtmodify_summary(mp, tp,
+                       XFS_RTBLOCKLOG(postblock - end),
+                       XFS_BITTOBLOCK(mp, end + 1), -1, rbpp, rsb);
+               if (error) {
+                       return error;
+               }
+       }
+       /*
+        * Increment the summary information corresponding to the entire
+        * (new) free extent.
+        */
+       error = xfs_rtmodify_summary(mp, tp,
+               XFS_RTBLOCKLOG(postblock + 1 - preblock),
+               XFS_BITTOBLOCK(mp, preblock), 1, rbpp, rsb);
+       return error;
+}
+
+/*
+ * Set the given range of bitmap bits to the given value.
+ * Do whatever I/O and logging is required.
+ */
+STATIC int                             /* error */
+xfs_rtmodify_range(
+       xfs_mount_t     *mp,            /* file system mount point */
+       xfs_trans_t     *tp,            /* transaction pointer */
+       xfs_rtblock_t   start,          /* starting block to modify */
+       xfs_extlen_t    len,            /* length of extent to modify */
+       int             val)            /* 1 for free, 0 for allocated */
+{
+       xfs_rtword_t    *b;             /* current word in buffer */
+       int             bit;            /* bit number in the word */
+       xfs_rtblock_t   block;          /* bitmap block number */
+       xfs_buf_t               *bp;            /* buf for the block */
+       xfs_rtword_t    *bufp;          /* starting word in buffer */
+       int             error;          /* error value */
+       xfs_rtword_t    *first;         /* first used word in the buffer */
+       int             i;              /* current bit number rel. to start */
+       int             lastbit;        /* last useful bit in word */
+       xfs_rtword_t    mask;           /* mask o frelevant bits for value */
+       int             word;           /* word number in the buffer */
+
+       /*
+        * Compute starting bitmap block number.
+        */
+       block = XFS_BITTOBLOCK(mp, start);
+       /*
+        * Read the bitmap block, and point to its data.
+        */
+       error = xfs_rtbuf_get(mp, tp, block, 0, &bp);
+       if (error) {
+               return error;
+       }
+       bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+       /*
+        * Compute the starting word's address, and starting bit.
+        */
+       word = XFS_BITTOWORD(mp, start);
+       first = b = &bufp[word];
+       bit = (int)(start & (XFS_NBWORD - 1));
+       /*
+        * 0 (allocated) => all zeroes; 1 (free) => all ones.
+        */
+       val = -val;
+       /*
+        * If not starting on a word boundary, deal with the first
+        * (partial) word.
+        */
+       if (bit) {
+               /*
+                * Compute first bit not changed and mask of relevant bits.
+                */
+               lastbit = XFS_RTMIN(bit + len, XFS_NBWORD);
+               mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit;
+               /*
+                * Set/clear the active bits.
+                */
+               if (val)
+                       *b |= mask;
+               else
+                       *b &= ~mask;
+               i = lastbit - bit;
+               /*
+                * Go on to the next block if that's where the next word is
+                * and we need the next word.
+                */
+               if (++word == XFS_BLOCKWSIZE(mp) && i < len) {
+                       /*
+                        * Log the changed part of this block.
+                        * Get the next one.
+                        */
+                       xfs_trans_log_buf(tp, bp,
+                               (uint)((char *)first - (char *)bufp),
+                               (uint)((char *)b - (char *)bufp));
+                       error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp);
+                       if (error) {
+                               return error;
+                       }
+                       first = b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+                       word = 0;
+               } else {
+                       /*
+                        * Go on to the next word in the buffer
+                        */
+                       b++;
+               }
+       } else {
+               /*
+                * Starting on a word boundary, no partial word.
+                */
+               i = 0;
+       }
+       /*
+        * Loop over whole words in buffers.  When we use up one buffer
+        * we move on to the next one.
+        */
+       while (len - i >= XFS_NBWORD) {
+               /*
+                * Set the word value correctly.
+                */
+               *b = val;
+               i += XFS_NBWORD;
+               /*
+                * Go on to the next block if that's where the next word is
+                * and we need the next word.
+                */
+               if (++word == XFS_BLOCKWSIZE(mp) && i < len) {
+                       /*
+                        * Log the changed part of this block.
+                        * Get the next one.
+                        */
+                       xfs_trans_log_buf(tp, bp,
+                               (uint)((char *)first - (char *)bufp),
+                               (uint)((char *)b - (char *)bufp));
+                       error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp);
+                       if (error) {
+                               return error;
+                       }
+                       first = b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+                       word = 0;
+               } else {
+                       /*
+                        * Go on to the next word in the buffer
+                        */
+                       b++;
+               }
+       }
+       /*
+        * If not ending on a word boundary, deal with the last
+        * (partial) word.
+        */
+       if (lastbit = len - i) {
+               /*
+                * Compute a mask of relevant bits.
+                */
+               bit = 0;
+               mask = ((xfs_rtword_t)1 << lastbit) - 1;
+               /*
+                * Set/clear the active bits.
+                */
+               if (val)
+                       *b |= mask;
+               else
+                       *b &= ~mask;
+               b++;
+       }
+       /*
+        * Log any remaining changed bytes.
+        */
+       if (b > first)
+               xfs_trans_log_buf(tp, bp, (uint)((char *)first - (char *)bufp),
+                       (uint)((char *)b - (char *)bufp - 1));
+       return 0;
+}
+
+/*
+ * Read and modify the summary information for a given extent size,
+ * bitmap block combination.
+ * Keeps track of a current summary block, so we don't keep reading
+ * it from the buffer cache.
+ */
+STATIC int                             /* error */
+xfs_rtmodify_summary(
+       xfs_mount_t     *mp,            /* file system mount point */
+       xfs_trans_t     *tp,            /* transaction pointer */
+       int             log,            /* log2 of extent size */
+       xfs_rtblock_t   bbno,           /* bitmap block number */
+       int             delta,          /* change to make to summary info */
+       xfs_buf_t               **rbpp,         /* in/out: summary block buffer */
+       xfs_fsblock_t   *rsb)           /* in/out: summary block number */
+{
+       xfs_buf_t               *bp;            /* buffer for the summary block */
+       int             error;          /* error value */
+       xfs_fsblock_t   sb;             /* summary fsblock */
+       int             so;             /* index into the summary file */
+       xfs_suminfo_t   *sp;            /* pointer to returned data */
+
+       /*
+        * Compute entry number in the summary file.
+        */
+       so = XFS_SUMOFFS(mp, log, bbno);
+       /*
+        * Compute the block number in the summary file.
+        */
+       sb = XFS_SUMOFFSTOBLOCK(mp, so);
+       /*
+        * If we have an old buffer, and the block number matches, use that.
+        */
+       if (rbpp && *rbpp && *rsb == sb)
+               bp = *rbpp;
+       /*
+        * Otherwise we have to get the buffer.
+        */
+       else {
+               /*
+                * If there was an old one, get rid of it first.
+                */
+               if (rbpp && *rbpp)
+                       xfs_trans_brelse(tp, *rbpp);
+               error = xfs_rtbuf_get(mp, tp, sb, 1, &bp);
+               if (error) {
+                       return error;
+               }
+               /*
+                * Remember this buffer and block for the next call.
+                */
+               if (rbpp) {
+                       *rbpp = bp;
+                       *rsb = sb;
+               }
+       }
+       /*
+        * Point to the summary information, modify and log it.
+        */
+       sp = XFS_SUMPTR(mp, bp, so);
+       *sp += delta;
+       xfs_trans_log_buf(tp, bp, (uint)((char *)sp - (char *)XFS_BUF_PTR(bp)),
+               (uint)((char *)sp - (char *)XFS_BUF_PTR(bp) + sizeof(*sp) - 1));
+       return 0;
+}
+
+/*
+ * Free an extent in the realtime subvolume.  Length is expressed in 
+ * realtime extents, as is the block number.
+ */
+int                                    /* error */
+xfs_rtfree_extent(
+       xfs_trans_t     *tp,            /* transaction pointer */
+       xfs_rtblock_t   bno,            /* starting block number to free */
+       xfs_extlen_t    len)            /* length of extent freed */
+{
+       int             error;          /* error value */
+       xfs_inode_t     *ip;            /* bitmap file inode */
+       xfs_mount_t     *mp;            /* file system mount structure */
+       xfs_fsblock_t   sb;             /* summary file block number */
+       xfs_buf_t               *sumbp;         /* summary file block buffer */
+
+       mp = tp->t_mountp;
+       /*
+        * Synchronize by locking the bitmap inode.
+        */
+       error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, XFS_ILOCK_EXCL, &ip);
+       if (error) {
+               return error;
+       }
+#if defined(__KERNEL__) && defined(DEBUG)
+       /*
+        * Check to see that this whole range is currently allocated.
+        */
+       {
+               int     stat;           /* result from checking range */
+
+               error = xfs_rtcheck_alloc_range(mp, tp, bno, len, &stat);
+               if (error) {
+                       return error;
+               }
+               ASSERT(stat);
+       }
+#endif
+       sumbp = NULL;
+       /*
+        * Free the range of realtime blocks.
+        */
+       error = xfs_rtfree_range(mp, tp, bno, len, &sumbp, &sb);
+       if (error) {
+               return error;
+       }
+       /*
+        * Mark more blocks free in the superblock.
+        */
+       xfs_trans_mod_sb(tp, XFS_TRANS_SB_FREXTENTS, (long)len);
+       /*
+        * If we've now freed all the blocks, reset the file sequence
+        * number to 0.
+        */
+       if (tp->t_frextents_delta + mp->m_sb.sb_frextents ==
+           mp->m_sb.sb_rextents) {
+               if (!(ip->i_d.di_flags & XFS_DIFLAG_NEWRTBM))
+                       ip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM;
+               *(__uint64_t *)&ip->i_d.di_atime = 0;
+               xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+       }
+       return 0;
+}
+
+/*
+ * Initialize realtime fields in the mount structure.
+ */
+int                            /* error */
+xfs_rtmount_init(
+       xfs_mount_t     *mp)    /* file system mount structure */
+{
+       xfs_buf_t       *bp;    /* buffer for last block of subvolume */
+       xfs_daddr_t     d;      /* address of last block of subvolume */
+       int             error;  /* error return value */
+       xfs_sb_t        *sbp;   /* filesystem superblock copy in mount */
+
+       sbp = &mp->m_sb;
+       if (sbp->sb_rblocks == 0)
+               return 0;
+       if (!mp->m_rtdev) {
+               printk(KERN_WARNING
+               "XFS: This FS has an RT subvol - specify -o rtdev on mount\n");
+               return XFS_ERROR(ENODEV);
+       }
+       mp->m_rsumlevels = sbp->sb_rextslog + 1;
+       mp->m_rsumsize =
+               (uint)sizeof(xfs_suminfo_t) * mp->m_rsumlevels *
+               sbp->sb_rbmblocks;
+       mp->m_rsumsize = roundup(mp->m_rsumsize, sbp->sb_blocksize);
+       mp->m_rbmip = mp->m_rsumip = NULL;
+       /*
+        * Check that the realtime section is an ok size.
+        */
+       d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks);
+       if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_rblocks) {
+               printk(KERN_WARNING "XFS: RT mount - %llu != %llu\n",
+                       XFS_BB_TO_FSB(mp, d), mp->m_sb.sb_rblocks);
+               return XFS_ERROR(E2BIG);
+       }
+       error = xfs_read_buf(mp, &mp->m_rtdev_targ, d - 1, 1, 0, &bp);
+       if (error) {
+               printk(KERN_WARNING
+                       "XFS: RT mount - xfs_read_buf returned %d\n", error);
+               if (error == ENOSPC)
+                       return XFS_ERROR(E2BIG);
+               return error;
+       }
+       xfs_buf_relse(bp);
+       return 0;
+}
diff --git a/libxfs/xfs_rtbit.c b/libxfs/xfs_rtbit.c
new file mode 100644 (file)
index 0000000..c51cba3
--- /dev/null
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+/*
+ * XFS bit manipulation routines, used only in realtime code.
+ */
+
+#include <xfs.h>
+
+/*
+ * xfs_lowbit32: get low bit set out of 32-bit argument, -1 if none set.
+ */
+int
+xfs_lowbit32(
+       __uint32_t      v)
+{
+       int             i;
+
+       if (v & 0x0000ffff)
+               if (v & 0x000000ff)
+                       i = 0;
+               else
+                       i = 8;
+       else if (v & 0xffff0000)
+               if (v & 0x00ff0000)
+                       i = 16;
+               else
+                       i = 24;
+       else
+               return -1;
+       return i + xfs_lowbit[(v >> i) & 0xff];
+}
diff --git a/libxfs/xfs_trans.c b/libxfs/xfs_trans.c
new file mode 100644 (file)
index 0000000..a30ad89
--- /dev/null
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <xfs.h>
+
+/*
+ * Initialize the precomputed transaction reservation values
+ * in the mount structure.
+ */
+void
+xfs_trans_init(
+       xfs_mount_t     *mp)
+{
+       xfs_trans_reservations_t        *resp;
+
+       resp = &(mp->m_reservations);
+       resp->tr_write =
+               (uint)(XFS_CALC_WRITE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp));
+       resp->tr_itruncate =
+               (uint)(XFS_CALC_ITRUNCATE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp));
+       resp->tr_rename =
+               (uint)(XFS_CALC_RENAME_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp));
+       resp->tr_link = (uint)XFS_CALC_LINK_LOG_RES(mp);
+       resp->tr_remove =
+               (uint)(XFS_CALC_REMOVE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp));
+       resp->tr_symlink =
+               (uint)(XFS_CALC_SYMLINK_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp));
+       resp->tr_create =
+               (uint)(XFS_CALC_CREATE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp));
+       resp->tr_mkdir =
+               (uint)(XFS_CALC_MKDIR_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp));
+       resp->tr_ifree =
+               (uint)(XFS_CALC_IFREE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp));
+       resp->tr_ichange =
+               (uint)(XFS_CALC_ICHANGE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp));
+       resp->tr_growdata = (uint)XFS_CALC_GROWDATA_LOG_RES(mp);
+       resp->tr_swrite = (uint)XFS_CALC_SWRITE_LOG_RES(mp);
+       resp->tr_writeid = (uint)XFS_CALC_WRITEID_LOG_RES(mp);
+       resp->tr_addafork =
+               (uint)(XFS_CALC_ADDAFORK_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp));
+       resp->tr_attrinval = (uint)XFS_CALC_ATTRINVAL_LOG_RES(mp);
+       resp->tr_attrset =
+               (uint)(XFS_CALC_ATTRSET_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp));
+       resp->tr_attrrm =
+               (uint)(XFS_CALC_ATTRRM_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp));
+       resp->tr_clearagi = (uint)XFS_CALC_CLEAR_AGI_BUCKET_LOG_RES(mp);
+       resp->tr_growrtalloc = (uint)XFS_CALC_GROWRTALLOC_LOG_RES(mp);
+       resp->tr_growrtzero = (uint)XFS_CALC_GROWRTZERO_LOG_RES(mp);
+       resp->tr_growrtfree = (uint)XFS_CALC_GROWRTFREE_LOG_RES(mp);
+}
diff --git a/logprint/Makefile b/logprint/Makefile
new file mode 100644 (file)
index 0000000..4b878e2
--- /dev/null
@@ -0,0 +1,50 @@
+#
+# Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+# 
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# 
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like.  Any license provided herein, whether implied or
+# otherwise, applies only to this software file.  Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write the Free Software Foundation, Inc., 59
+# Temple Place - Suite 330, Boston MA 02111-1307, USA.
+# 
+# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+# Mountain View, CA  94043, or:
+# 
+# http://www.sgi.com 
+# 
+# For further information regarding this notice, see: 
+# 
+# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+#
+
+TOPDIR = ..
+include $(TOPDIR)/include/builddefs
+
+CMDTARGET = xfs_logprint
+CMDDEPS = $(LIBXFS)
+
+CFILES = log_print_trans.c log_print_all.c log_misc.c logprint.c \
+       xfs_log_recover.c
+HFILES = logprint.h
+LLDLIBS        = $(LIBXFS) $(LIBUUID)
+
+default: $(CMDTARGET)
+
+include $(BUILDRULES)
+
+install: default
+       $(INSTALL) -m 755 -d $(XFS_CMDS_BIN_DIR)
+       $(INSTALL) -m 755 $(CMDTARGET) $(XFS_CMDS_BIN_DIR)
diff --git a/logprint/log_misc.c b/logprint/log_misc.c
new file mode 100644 (file)
index 0000000..bc53bfa
--- /dev/null
@@ -0,0 +1,1184 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include "logprint.h"
+
+#define ZEROED_LOG     (-4)
+#define FULL_READ      (-3)
+#define PARTIAL_READ   (-2)
+#define BAD_HEADER     (-1)
+#define NO_ERROR       (0)
+
+static int logBBsize;
+char *trans_type[] = {
+       "",
+       "SETATTR",
+       "SETATTR_SIZE",
+       "INACTIVE",
+       "CREATE",
+       "CREATE_TRUNC",
+       "TRUNCATE_FILE",
+       "REMOVE",
+       "LINK",
+       "RENAME",
+       "MKDIR",
+       "RMDIR",
+       "SYMLINK",
+       "SET_DMATTRS",
+       "GROWFS",
+       "STRAT_WRITE",
+       "DIOSTRAT",
+       "WRITE_SYNC",
+       "WRITEID",
+       "ADDAFORK",
+       "ATTRINVAL",
+       "ATRUNCATE",
+       "ATTR_SET",
+       "ATTR_RM",
+       "ATTR_FLAG",
+       "CLEAR_AGI_BUCKET",
+       "QM_SBCHANGE",
+       "DUMMY1",
+       "DUMMY2",
+       "QM_QUOTAOFF",
+       "QM_DQALLOC",
+       "QM_SETQLIM",
+       "QM_DQCLUSTER",
+       "QM_QINOCREATE",
+       "QM_QUOTAOFF_END",
+       "SB_UNIT",
+       "FSYNC_TS",
+       "GROWFSRT_ALLOC",
+       "GROWFSRT_ZERO",
+       "GROWFSRT_FREE",
+       "SWAPEXT",
+};
+
+typedef struct xlog_split_item {
+       struct xlog_split_item  *si_next;
+       struct xlog_split_item  *si_prev;
+       xlog_tid_t              si_tid;
+       int                     si_skip;
+} xlog_split_item_t;
+
+xlog_split_item_t *split_list = 0;
+
+void
+print_xlog_op_line(void)
+{
+    printf("--------------------------------------"
+           "--------------------------------------\n");
+}      /* print_xlog_op_line */
+
+void
+print_xlog_record_line(void)
+{
+    printf("======================================"
+           "======================================\n");
+}      /* print_xlog_record_line */
+
+void
+print_stars(void)
+{
+    printf("***********************************"
+           "***********************************\n");
+}      /* print_xlog_record_line */
+
+/*
+ * Given a pointer to a data segment, print out the data as if it were
+ * a log operation header.
+ */
+void
+xlog_print_op_header(xlog_op_header_t  *op_head,
+                    int                i,
+                    xfs_caddr_t        *ptr)
+{
+    xlog_op_header_t hbuf;
+
+    /*
+     * bcopy because on 64/n32, partial reads can cause the op_head
+     * pointer to come in pointing to an odd-numbered byte
+     */
+    bcopy(op_head, &hbuf, sizeof(xlog_op_header_t));
+    op_head = &hbuf;
+    *ptr += sizeof(xlog_op_header_t);
+    printf("Oper (%d): tid: %x  len: %d  clientid: %s  ", i,
+           INT_GET(op_head->oh_tid, ARCH_CONVERT),
+           INT_GET(op_head->oh_len, ARCH_CONVERT),
+           (op_head->oh_clientid == XFS_TRANSACTION ? "TRANS" :
+           (op_head->oh_clientid == XFS_LOG ? "LOG" : "ERROR")));
+    printf("flags: ");
+    if (op_head->oh_flags) {
+       if (op_head->oh_flags & XLOG_START_TRANS)
+           printf("START ");
+       if (op_head->oh_flags & XLOG_COMMIT_TRANS)
+           printf("COMMIT ");
+       if (op_head->oh_flags & XLOG_WAS_CONT_TRANS)
+           printf("WAS_CONT ");
+       if (op_head->oh_flags & XLOG_UNMOUNT_TRANS)
+           printf("UNMOUNT ");
+       if (op_head->oh_flags & XLOG_CONTINUE_TRANS)
+           printf("CONTINUE ");
+       if (op_head->oh_flags & XLOG_END_TRANS)
+           printf("END ");
+    } else {
+       printf("none");
+    }
+    printf("\n");
+}      /* xlog_print_op_header */
+
+
+void
+xlog_print_add_to_trans(xlog_tid_t     tid,
+                       int             skip)
+{
+    xlog_split_item_t *item;
+
+    item         = (xlog_split_item_t *)calloc(sizeof(xlog_split_item_t), 1);
+    item->si_tid  = tid;
+    item->si_skip = skip;
+    item->si_next = split_list;
+    item->si_prev = 0;
+    if (split_list)
+       split_list->si_prev = item;
+    split_list   = item;
+}      /* xlog_print_add_to_trans */
+
+
+int
+xlog_print_find_tid(xlog_tid_t tid, uint was_cont)
+{
+    xlog_split_item_t *listp = split_list;
+
+    if (!split_list) {
+       if (was_cont != 0)      /* Not first time we have used this tid */
+           return 1;
+       else
+           return 0;
+    }
+    while (listp) {
+       if (listp->si_tid == tid)
+           break;
+       listp = listp->si_next;
+    }
+    if (!listp)  {
+       return 0;
+    }
+    if (--listp->si_skip == 0) {
+       if (listp == split_list) {              /* delete at head */
+           split_list = listp->si_next;
+           if (split_list)
+               split_list->si_prev = NULL;
+       } else {
+           if (listp->si_next)
+               listp->si_next->si_prev = listp->si_prev;
+           listp->si_prev->si_next = listp->si_next;
+       }
+       free(listp);
+    }
+    return 1;
+}      /* xlog_print_find_tid */
+
+int
+xlog_print_trans_header(xfs_caddr_t *ptr, int len)
+{
+    xfs_trans_header_t  *h;
+    xfs_caddr_t                cptr = *ptr;
+    __uint32_t          magic;
+    char                *magic_c = (char *)&magic;
+
+    *ptr += len;
+    
+    magic=*(__uint32_t*)cptr; /* XXX INT_GET soon */
+    
+    if (len >= 4)
+       printf("%c%c%c%c:", 
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+                magic_c[3], magic_c[2], magic_c[1], magic_c[0]);
+#else
+               magic_c[0], magic_c[1], magic_c[2], magic_c[3]);
+#endif
+    if (len != sizeof(xfs_trans_header_t)) {
+       printf("   Not enough data to decode further\n");
+       return 1;
+    }
+    h = (xfs_trans_header_t *)cptr;
+    printf("    type: %s       tid: %x       num_items: %d\n",
+          trans_type[h->th_type], h->th_tid, h->th_num_items);
+    return 0;
+}      /* xlog_print_trans_header */
+
+
+int
+xlog_print_trans_buffer(xfs_caddr_t *ptr, int len, int *i, int num_ops)
+{
+    xfs_buf_log_format_t *f;
+    xfs_buf_log_format_v1_t *old_f;
+    xfs_agi_t           *agi;
+    xfs_agf_t           *agf;
+    xfs_disk_dquot_t    *dq;
+    xlog_op_header_t    *head = 0;
+    int                         num, skip;
+    int                         super_block = 0;
+    int                         bucket, col, buckets;
+    __int64_t           blkno;
+    xfs_buf_log_format_t lbuf;
+    int                         size, blen, map_size, struct_size;
+    long long           x, y;
+    
+    /*
+     * bcopy to ensure 8-byte alignment for the long longs in
+     * buf_log_format_t structure
+     */
+    bcopy(*ptr, &lbuf, sizeof(xfs_buf_log_format_t));
+    f = &lbuf;
+    *ptr += len;
+
+    if (f->blf_type == XFS_LI_BUF) {
+       blkno = f->blf_blkno;
+       size = f->blf_size;
+       blen = f->blf_len;
+       map_size = f->blf_map_size;
+       struct_size = sizeof(xfs_buf_log_format_t);
+    } else {
+       old_f = (xfs_buf_log_format_v1_t*)f;
+       blkno = old_f->blf_blkno;
+       size = old_f->blf_size;
+       blen = old_f->blf_len;
+       map_size = old_f->blf_map_size;
+       struct_size = sizeof(xfs_buf_log_format_v1_t);
+    }
+    switch (f->blf_type)  {
+    case XFS_LI_BUF:
+       printf("BUF:  ");
+       break;
+    case XFS_LI_6_1_BUF:
+       printf("6.1 BUF:  ");
+       break;
+    case XFS_LI_5_3_BUF:
+       printf("5.3 BUF:  ");
+       break;
+    case XFS_LI_DQUOT:
+       printf("DQUOT BUF:  ");
+       break;
+    default:
+       printf("UNKNOWN BUF:  ");
+       break;
+    }
+    if (len >= struct_size) {
+       ASSERT((len - sizeof(struct_size)) % sizeof(int) == 0);
+       printf("#regs: %d   start blkno: %lld (0x%llx)  len: %d  bmap size: %d\n",
+              size, blkno, blkno, blen, map_size);
+       if (blkno == 0)
+           super_block = 1;
+    } else {
+       ASSERT(len >= 4);       /* must have at least 4 bytes if != 0 */
+       printf("#regs: %d   Not printing rest of data\n", f->blf_size);
+       return size;
+    }
+    num = size-1;
+
+    /* Check if all regions in this log item were in the given LR ptr */
+    if (*i+num > num_ops-1) {
+       skip = num - (num_ops-1-*i);
+       num = num_ops-1-*i;
+    } else {
+       skip = 0;
+    }
+    while (num-- > 0) {
+       (*i)++;
+       head = (xlog_op_header_t *)*ptr;
+       xlog_print_op_header(head, *i, ptr);
+       if (super_block) {
+               printf("SUPER BLOCK Buffer: ");
+               if (INT_GET(head->oh_len, ARCH_CONVERT) < 4*8) {
+                       printf("Out of space\n");
+               } else {
+                       printf("\n");
+                       /*
+                        * bcopy because *ptr may not be 8-byte aligned
+                        */
+                       bcopy(*ptr, &x, sizeof(long long));
+                       bcopy(*ptr+8, &y, sizeof(long long));
+                       printf("icount: %lld  ifree: %lld  ", 
+                                INT_GET(x, ARCH_CONVERT), 
+                                INT_GET(y, ARCH_CONVERT));
+                       bcopy(*ptr+16, &x, sizeof(long long));
+                       bcopy(*ptr+24, &y, sizeof(long long));
+                       printf("fdblks: %lld  frext: %lld\n", 
+                                INT_GET(x, ARCH_CONVERT), 
+                                INT_GET(y, ARCH_CONVERT));
+               }
+               super_block = 0;
+       } else if (INT_GET(*(uint *)(*ptr), ARCH_CONVERT) == XFS_AGI_MAGIC) {
+               agi = (xfs_agi_t *)(*ptr);
+               printf("AGI Buffer: XAGI  ");
+               if (INT_GET(head->oh_len, ARCH_CONVERT) <
+                   sizeof(xfs_agi_t) -
+                   XFS_AGI_UNLINKED_BUCKETS*sizeof(xfs_agino_t)) {
+                       printf("out of space\n");
+               } else {
+                       printf("\n");
+                       printf("ver: %d  ",
+                               INT_GET(agi->agi_versionnum, ARCH_CONVERT));
+                       printf("seq#: %d  len: %d  cnt: %d  root: %d\n",
+                               INT_GET(agi->agi_seqno, ARCH_CONVERT),
+                               INT_GET(agi->agi_length, ARCH_CONVERT),
+                               INT_GET(agi->agi_count, ARCH_CONVERT),
+                               INT_GET(agi->agi_root, ARCH_CONVERT));
+                       printf("level: %d  free#: 0x%x  newino: 0x%x\n",
+                               INT_GET(agi->agi_level, ARCH_CONVERT),
+                               INT_GET(agi->agi_freecount, ARCH_CONVERT),
+                               INT_GET(agi->agi_newino, ARCH_CONVERT));
+                       if (INT_GET(head->oh_len, ARCH_CONVERT) == 128) {
+                               buckets = 17;
+                       } else if (INT_GET(head->oh_len, ARCH_CONVERT) == 256) {
+                               buckets = 32 + 17;
+                       } else {
+                               buckets = XFS_AGI_UNLINKED_BUCKETS;
+                       }
+                       for (bucket = 0; bucket < buckets;) {
+                               printf("bucket[%d - %d]: ", bucket, bucket+3);
+                               for (col = 0; col < 4; col++, bucket++) {
+                                       if (bucket < buckets) {
+                                               printf("0x%x ",
+                       INT_GET(agi->agi_unlinked[bucket], ARCH_CONVERT));
+                                       }
+                               }
+                               printf("\n");
+                       }
+               }
+       } else if (INT_GET(*(uint *)(*ptr), ARCH_CONVERT) == XFS_AGF_MAGIC) {
+               agf = (xfs_agf_t *)(*ptr);
+               printf("AGF Buffer: XAGF  ");
+               if (INT_GET(head->oh_len, ARCH_CONVERT) < sizeof(xfs_agf_t)) {
+                       printf("Out of space\n");
+               } else {
+                       printf("\n");
+                       printf("ver: %d  seq#: %d  len: %d  \n",
+                               INT_GET(agf->agf_versionnum, ARCH_CONVERT),
+                               INT_GET(agf->agf_seqno, ARCH_CONVERT),
+                               INT_GET(agf->agf_length, ARCH_CONVERT));
+                       printf("root BNO: %d  CNT: %d\n",
+                               INT_GET(agf->agf_roots[XFS_BTNUM_BNOi],
+                                       ARCH_CONVERT),
+                               INT_GET(agf->agf_roots[XFS_BTNUM_CNTi],
+                                       ARCH_CONVERT));
+                       printf("level BNO: %d  CNT: %d\n",
+                               INT_GET(agf->agf_levels[XFS_BTNUM_BNOi],
+                                       ARCH_CONVERT),
+                               INT_GET(agf->agf_levels[XFS_BTNUM_CNTi],
+                                       ARCH_CONVERT));
+                       printf("1st: %d  last: %d  cnt: %d  "
+                              "freeblks: %d  longest: %d\n",
+                               INT_GET(agf->agf_flfirst, ARCH_CONVERT),
+                               INT_GET(agf->agf_fllast, ARCH_CONVERT),
+                               INT_GET(agf->agf_flcount, ARCH_CONVERT),
+                               INT_GET(agf->agf_freeblks, ARCH_CONVERT),
+                               INT_GET(agf->agf_longest, ARCH_CONVERT));
+               }
+       } else if (INT_GET(*(uint *)(*ptr), ARCH_CONVERT) == XFS_DQUOT_MAGIC) {
+               dq = (xfs_disk_dquot_t *)(*ptr);
+               printf("DQUOT Buffer: DQ  ");
+               if (INT_GET(head->oh_len, ARCH_CONVERT) <
+                               sizeof(xfs_disk_dquot_t)) {
+                       printf("Out of space\n");
+               }
+               else {
+                       printf("\n");
+                       printf("ver: %d  flags: 0x%x  id: %d  \n",
+                               INT_GET(dq->d_version, ARCH_CONVERT),
+                               INT_GET(dq->d_flags, ARCH_CONVERT),
+                               INT_GET(dq->d_id, ARCH_CONVERT));
+                       printf("blk limits  hard: %llu  soft: %llu\n",
+                               INT_GET(dq->d_blk_hardlimit, ARCH_CONVERT),
+                               INT_GET(dq->d_blk_softlimit, ARCH_CONVERT));
+                       printf("blk  count: %llu  warns: %d  timer: %d\n",
+                               INT_GET(dq->d_bcount, ARCH_CONVERT),
+                               INT_GET(dq->d_bwarns, ARCH_CONVERT),
+                               INT_GET(dq->d_btimer, ARCH_CONVERT));
+                       printf("ino limits  hard: %llu  soft: %llu\n",
+                               INT_GET(dq->d_ino_hardlimit, ARCH_CONVERT),
+                               INT_GET(dq->d_ino_softlimit, ARCH_CONVERT));
+                       printf("ino  count: %llu  warns: %d  timer: %d\n",
+                               INT_GET(dq->d_icount, ARCH_CONVERT),
+                               INT_GET(dq->d_iwarns, ARCH_CONVERT),
+                               INT_GET(dq->d_itimer, ARCH_CONVERT));
+               }
+       } else {
+               printf("BUF DATA\n");
+               if (print_data) {
+                       uint *dp  = (uint *)*ptr;
+                       int  nums = INT_GET(head->oh_len, ARCH_CONVERT) >> 2;
+                       int  i = 0;
+
+                       while (i < nums) {
+                               if ((i % 8) == 0)
+                                       printf("%2x ", i);
+                               printf("%8x ", *dp);
+                               dp++;
+                               i++;
+                               if ((i % 8) == 0)
+                                       printf("\n");
+                       }
+                       printf("\n");
+               }
+       }
+       *ptr += INT_GET(head->oh_len, ARCH_CONVERT);
+    }
+    if (head && head->oh_flags & XLOG_CONTINUE_TRANS)
+       skip++;
+    return skip;
+}      /* xlog_print_trans_buffer */
+
+
+int
+xlog_print_trans_efd(xfs_caddr_t *ptr, uint len)
+{
+    xfs_efd_log_format_t *f;
+    xfs_extent_t        *ex;
+    int                         i;
+    xfs_efd_log_format_t lbuf;
+    
+    /*
+     * bcopy to ensure 8-byte alignment for the long longs in
+     * xfs_efd_log_format_t structure
+     */
+    bcopy(*ptr, &lbuf, sizeof(xfs_efd_log_format_t));
+    f = &lbuf;
+    *ptr += len;
+    if (len >= sizeof(xfs_efd_log_format_t)) {
+       printf("EFD:  #regs: %d    num_extents: %d  id: 0x%llx\n",
+              f->efd_size, f->efd_nextents, f->efd_efi_id);
+       ex = f->efd_extents;
+       for (i=0; i< f->efd_size; i++) {
+               printf("(s: 0x%llx, l: %d) ", ex->ext_start, ex->ext_len);
+               if (i % 4 == 3) printf("\n");
+               ex++;
+       }
+       if (i % 4 != 0) printf("\n");
+       return 0;
+    } else {
+       printf("EFD: Not enough data to decode further\n");
+       return 1;
+    }
+}      /* xlog_print_trans_efd */
+
+
+int
+xlog_print_trans_efi(xfs_caddr_t *ptr, uint len)
+{
+    xfs_efi_log_format_t *f;
+    xfs_extent_t        *ex;
+    int                         i;
+    xfs_efi_log_format_t lbuf;
+
+    /*
+     * bcopy to ensure 8-byte alignment for the long longs in
+     * xfs_efi_log_format_t structure
+     */
+    bcopy(*ptr, &lbuf, sizeof(xfs_efi_log_format_t));
+    f = &lbuf;
+    *ptr += len;
+    if (len >= sizeof(xfs_efi_log_format_t)) {
+       printf("EFI:  #regs: %d    num_extents: %d  id: 0x%llx\n",
+              f->efi_size, f->efi_nextents, f->efi_id);
+       ex = f->efi_extents;
+       for (i=0; i< f->efi_size; i++) {
+               printf("(s: 0x%llx, l: %d) ", ex->ext_start, ex->ext_len);
+               if (i % 4 == 3) printf("\n");
+               ex++;
+       }
+       if (i % 4 != 0) printf("\n");
+       return 0;
+    } else {
+       printf("EFI: Not enough data to decode further\n");
+       return 1;
+    }
+}      /* xlog_print_trans_efi */
+
+
+/* ARGSUSED */
+void
+xlog_print_trans_inode_core(xfs_dinode_core_t *ip)
+{
+    printf("INODE CORE\n");
+    printf("magic 0x%hx mode 0%ho version %d format %d\n",
+          ip->di_magic, ip->di_mode, (int)ip->di_version,
+          (int)ip->di_format);
+    printf("nlink %hd uid %d gid %d\n",
+          ip->di_nlink, ip->di_uid, ip->di_gid);
+    printf("atime 0x%x mtime 0x%x ctime 0x%x\n",
+          ip->di_atime.t_sec, ip->di_mtime.t_sec, ip->di_ctime.t_sec);
+    printf("size 0x%llx nblocks 0x%llx extsize 0x%x nextents 0x%x\n",
+          ip->di_size, ip->di_nblocks, ip->di_extsize, ip->di_nextents);
+    printf("naextents 0x%x forkoff %d dmevmask 0x%x dmstate 0x%hx\n",
+          ip->di_anextents, (int)ip->di_forkoff, ip->di_dmevmask,
+          ip->di_dmstate);
+    printf("flags 0x%x gen 0x%x\n",
+          ip->di_flags, ip->di_gen);
+}
+
+void
+xlog_print_dir_sf(xfs_dir_shortform_t *sfp, int size)
+{
+       xfs_ino_t       ino;
+       int             count;
+       int             i;
+       char            namebuf[257];
+       xfs_dir_sf_entry_t      *sfep;
+
+        /* XXX need to determine whether this is v1 or v2, then
+           print appropriate structure */
+        
+       printf("SHORTFORM DIRECTORY size %d\n",
+                size);
+        /* bail out for now */
+        
+        return;
+        
+       printf("SHORTFORM DIRECTORY size %d count %d\n",
+              size, sfp->hdr.count);
+       bcopy(&(sfp->hdr.parent), &ino, sizeof(ino));
+       printf(".. ino 0x%llx\n", INT_GET(ino, ARCH_CONVERT));
+
+       count = (uint)(sfp->hdr.count);
+       sfep = &(sfp->list[0]);
+       for (i = 0; i < count; i++) {
+               bcopy(&(sfep->inumber), &ino, sizeof(ino));
+               bcopy((sfep->name), namebuf, sfep->namelen);
+               namebuf[sfep->namelen] = '\0';
+               printf("%s ino 0x%llx namelen %d\n",
+                      namebuf, ino, sfep->namelen);
+               sfep = XFS_DIR_SF_NEXTENTRY(sfep);
+       }
+}
+
+int
+xlog_print_trans_inode(xfs_caddr_t *ptr, int len, int *i, int num_ops)
+{
+    xfs_inode_log_format_t *f;
+    xfs_inode_log_format_t_v1 *old_f;
+    xfs_dinode_core_t     dino;
+    xlog_op_header_t      *op_head;
+    int                           version;
+    xfs_inode_log_format_t lbuf = {0};
+    int                           mode;
+    int                           size;
+
+    /*
+     * print inode type header region
+     *
+     * bcopy to ensure 8-byte alignment for the long longs in
+     * xfs_inode_log_format_t structure
+     *
+     * len can be smaller than xfs_inode_log_format_t sometimes... (?)
+     */
+    bcopy(*ptr, &lbuf, MIN(sizeof(xfs_inode_log_format_t), len));
+    version = lbuf.ilf_type;
+    f = &lbuf;
+    (*i)++;                                    /* bump index */
+    *ptr += len;
+    if (version == XFS_LI_5_3_INODE) {
+       old_f = (xfs_inode_log_format_t_v1 *)f;
+       if (len == sizeof(xfs_inode_log_format_t_v1)) {
+           printf("5.3 INODE: #regs: %d   ino: 0x%llx  flags: 0x%x   dsize: %d\n",
+                  old_f->ilf_size, old_f->ilf_ino,
+                  old_f->ilf_fields, old_f->ilf_dsize);
+       } else {
+           ASSERT(len >= 4);   /* must have at least 4 bytes if != 0 */
+           printf("5.3 INODE: #regs: %d   Not printing rest of data\n",
+                  old_f->ilf_size);
+           return old_f->ilf_size;
+       }
+    } else {
+       if (len == sizeof(xfs_inode_log_format_t)) {
+           if (version == XFS_LI_6_1_INODE)
+               printf("6.1 INODE: ");
+           else printf("INODE: ");
+           printf("#regs: %d   ino: 0x%llx  flags: 0x%x   dsize: %d\n",
+                  f->ilf_size, f->ilf_ino, f->ilf_fields, f->ilf_dsize);
+           printf("        blkno: %lld  len: %d  boff: %d\n",
+                  f->ilf_blkno, f->ilf_len, f->ilf_boffset);
+       } else {
+           ASSERT(len >= 4);   /* must have at least 4 bytes if != 0 */
+           printf("INODE: #regs: %d   Not printing rest of data\n",
+                  f->ilf_size);
+           return f->ilf_size;
+       }
+    }
+
+    if (*i >= num_ops)                 /* end of LR */
+           return f->ilf_size-1;
+
+    /* core inode comes 2nd */
+    op_head = (xlog_op_header_t *)*ptr;
+    xlog_print_op_header(op_head, *i, ptr);
+    
+    if (XLOG_SET(op_head->oh_flags, XLOG_CONTINUE_TRANS))  {
+       return f->ilf_size-1;
+    }
+    
+    bcopy(*ptr, &dino, sizeof(dino));
+    mode = dino.di_mode & IFMT;
+    size = (int)dino.di_size;
+    xlog_print_trans_inode_core(&dino);
+    *ptr += sizeof(xfs_dinode_core_t);
+
+    if (*i == num_ops-1 && f->ilf_size == 3)  {
+           return 1;
+    }
+
+    /* does anything come next */
+    op_head = (xlog_op_header_t *)*ptr;
+    switch (f->ilf_fields & XFS_ILOG_NONCORE) {
+       case XFS_ILOG_DEXT: {
+           ASSERT(f->ilf_size == 3);
+           (*i)++;
+           xlog_print_op_header(op_head, *i, ptr);
+           printf("EXTENTS inode data\n");
+           *ptr += INT_GET(op_head->oh_len, ARCH_CONVERT);
+           if (XLOG_SET(op_head->oh_flags, XLOG_CONTINUE_TRANS))  {
+               return 1;
+           }
+           break;
+       }
+       case XFS_ILOG_DBROOT: {
+           ASSERT(f->ilf_size == 3);
+           (*i)++;
+           xlog_print_op_header(op_head, *i, ptr);
+           printf("BTREE inode data\n");
+           *ptr += INT_GET(op_head->oh_len, ARCH_CONVERT);
+           if (XLOG_SET(op_head->oh_flags, XLOG_CONTINUE_TRANS))  {
+               return 1;
+           }
+           break;
+       }
+       case XFS_ILOG_DDATA: {
+           ASSERT(f->ilf_size == 3);
+           (*i)++;
+           xlog_print_op_header(op_head, *i, ptr);
+           printf("LOCAL inode data\n");
+           if (mode == IFDIR) {
+               xlog_print_dir_sf((xfs_dir_shortform_t*)*ptr, size);
+           }
+           *ptr += INT_GET(op_head->oh_len, ARCH_CONVERT);
+           if (XLOG_SET(op_head->oh_flags, XLOG_CONTINUE_TRANS))
+               return 1;
+           break;
+       }
+       case XFS_ILOG_DEV: {
+           ASSERT(f->ilf_size == 2);
+           printf("DEV inode: no extra region\n");
+           break;
+       }
+       case XFS_ILOG_UUID: {
+           ASSERT(f->ilf_size == 2);
+           printf("UUID inode: no extra region\n");
+           break;
+       }
+       case 0: {
+           ASSERT(f->ilf_size == 2);
+           break;
+       }
+       default: {
+           xlog_panic("xlog_print_trans_inode: illegal inode type");
+       }
+    }
+    return 0;
+}      /* xlog_print_trans_inode */
+
+
+
+/******************************************************************************
+ *
+ *             Log print routines
+ *
+ ******************************************************************************
+ */
+
+void
+xlog_print_lseek(xlog_t *log, int fd, xfs_daddr_t blkno, int whence)
+{
+#define BBTOOFF64(bbs) (((xfs_off_t)(bbs)) << BBSHIFT)
+       xfs_off_t offset;
+
+       if (whence == SEEK_SET)
+               offset = BBTOOFF64(blkno+log->l_logBBstart);
+       else
+               offset = BBTOOFF64(blkno);
+       if (lseek64(fd, offset, whence) < 0) {
+               fprintf(stderr, "%s: lseek64 to %llu failed: %s\n",
+                       progname, offset, strerror(errno));
+               exit(1);
+       }
+}      /* xlog_print_lseek */
+
+
+void
+print_lsn(xfs_caddr_t  string,
+         xfs_lsn_t     *lsn,
+          xfs_arch_t    arch)
+{
+    printf("%s: %u,%u", string, 
+            CYCLE_LSN(*lsn, arch), BLOCK_LSN(*lsn, arch));
+}
+
+
+int
+xlog_print_record(int            fd,
+                int              num_ops,
+                int              len,
+                int              *read_type,
+                xfs_caddr_t      *partial_buf,
+                xlog_rec_header_t *rhead)
+{
+    xlog_op_header_t   *op_head;
+    xlog_rec_header_t  *rechead;
+    xfs_caddr_t                buf, ptr;
+    int                        read_len, skip;
+    int                        ret, n, i;
+
+    if (print_no_print)
+           return NO_ERROR;
+    
+    if (!len) {
+        printf("\n");
+        return NO_ERROR;
+    }
+
+    /* read_len must read up to some block boundary */
+    read_len = (int) BBTOB(BTOBB(len));
+
+    /* read_type => don't malloc() new buffer, use old one */
+    if (*read_type == FULL_READ) {
+       if ((ptr = buf = (xfs_caddr_t)malloc(read_len)) == NULL) {
+           fprintf(stderr, "xlog_print_record: malloc failed\n");
+           exit(1);
+       }
+    } else {
+       read_len -= *read_type;
+       buf = (xfs_caddr_t)((__psint_t)(*partial_buf) + (__psint_t)(*read_type));
+       ptr = *partial_buf;
+    }
+    if ((ret = (int) read(fd, buf, read_len)) == -1) {
+       fprintf(stderr, "xlog_print_record: read error\n");
+       exit(1);
+    }
+    /* Did we overflow the end? */
+    if (*read_type == FULL_READ &&
+       BLOCK_LSN(rhead->h_lsn, ARCH_CONVERT)+BTOBB(read_len) >= logBBsize) {
+       *read_type = BBTOB(logBBsize-BLOCK_LSN(rhead->h_lsn, ARCH_CONVERT)-1);
+       *partial_buf = buf;
+       return PARTIAL_READ;
+    }
+    
+    /* Did we read everything? */
+    if ((ret == 0 && read_len != 0) || ret != read_len) {
+       *read_type = ret;
+       *partial_buf = buf;
+       return PARTIAL_READ;
+    }
+    if (*read_type != FULL_READ)
+       read_len += *read_type;
+    
+    /* Everything read in.  Start from beginning of buffer */
+    buf = ptr;
+    for (i = 0; ptr < buf + read_len; ptr += BBSIZE, i++) {
+       rechead = (xlog_rec_header_t *)ptr;
+       if (INT_GET(rechead->h_magicno, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM) {
+           xlog_print_lseek(0, fd, -read_len+i*BBSIZE, SEEK_CUR);
+           free(buf);
+           return -1;
+       } else {
+           if (INT_GET(rhead->h_cycle, ARCH_CONVERT) !=
+                       INT_GET(*(uint *)ptr, ARCH_CONVERT)) {
+               if (*read_type == FULL_READ)
+                   return -1;
+               else if (INT_GET(rhead->h_cycle, ARCH_CONVERT) + 1 !=
+                       INT_GET(*(uint *)ptr, ARCH_CONVERT))
+                   return -1;
+           }
+       }
+       INT_SET(*(uint *)ptr, ARCH_CONVERT,
+               INT_GET(rhead->h_cycle_data[i], ARCH_CONVERT));
+    }
+    ptr = buf;
+    for (i=0; i<num_ops; i++) {
+       print_xlog_op_line();
+       op_head = (xlog_op_header_t *)ptr;
+       xlog_print_op_header(op_head, i, &ptr);
+
+       /* print transaction data */
+       if (print_no_data ||
+           ((XLOG_SET(op_head->oh_flags, XLOG_WAS_CONT_TRANS) ||
+             XLOG_SET(op_head->oh_flags, XLOG_CONTINUE_TRANS)) && 
+            INT_GET(op_head->oh_len, ARCH_CONVERT) == 0)) {
+           for (n = 0; n < INT_GET(op_head->oh_len, ARCH_CONVERT); n++) {
+               printf("%c", *ptr);
+               ptr++;
+           }
+           printf("\n");
+           continue;
+       }
+       if (xlog_print_find_tid(INT_GET(op_head->oh_tid, ARCH_CONVERT),
+                               op_head->oh_flags & XLOG_WAS_CONT_TRANS)) {
+           printf("Left over region from split log item\n");
+           ptr += INT_GET(op_head->oh_len, ARCH_CONVERT);
+           continue;
+       }
+       if (INT_GET(op_head->oh_len, ARCH_CONVERT) != 0) {
+           if (*(uint *)ptr == XFS_TRANS_HEADER_MAGIC) {
+               skip = xlog_print_trans_header(&ptr,
+                                       INT_GET(op_head->oh_len, ARCH_CONVERT));
+           } else {
+               switch (*(unsigned short *)ptr) {
+                   case XFS_LI_5_3_BUF:
+                   case XFS_LI_6_1_BUF:
+                   case XFS_LI_DQUOT:
+                   case XFS_LI_BUF: {
+                       skip = xlog_print_trans_buffer(&ptr,
+                                       INT_GET(op_head->oh_len, ARCH_CONVERT),
+                                       &i, num_ops);
+                       break;
+                   }
+                   case XFS_LI_5_3_INODE:
+                   case XFS_LI_6_1_INODE:
+                   case XFS_LI_INODE: {
+                       skip = xlog_print_trans_inode(&ptr,
+                                       INT_GET(op_head->oh_len, ARCH_CONVERT),
+                                       &i, num_ops);
+                       break;
+                   }
+                   case XFS_LI_EFI: {
+                       skip = xlog_print_trans_efi(&ptr,
+                                       INT_GET(op_head->oh_len, ARCH_CONVERT));
+                       break;
+                   }
+                   case XFS_LI_EFD: {
+                       skip = xlog_print_trans_efd(&ptr,
+                                       INT_GET(op_head->oh_len, ARCH_CONVERT));
+                       break;
+                   }
+                   case XLOG_UNMOUNT_TYPE: {
+                       printf("Unmount filesystem\n");
+                       skip = 0;
+                       break;
+                   }
+                   default: {
+                       fprintf(stderr, "%s: unknown log operation type (%x)\n",
+                                progname, *(unsigned short *)ptr);
+                       skip = 0;
+                       ptr += INT_GET(op_head->oh_len, ARCH_CONVERT);
+                   }
+               } /* switch */
+           } /* else */
+           if (skip != 0)
+               xlog_print_add_to_trans(INT_GET(op_head->oh_tid, ARCH_CONVERT), skip);
+       }
+    }
+    printf("\n");
+    free(buf);
+    return NO_ERROR;
+}      /* xlog_print_record */
+
+
+int
+xlog_print_rec_head(xlog_rec_header_t *head, int *len)
+{
+    int i;
+    char uub[64];
+    int datalen,bbs;
+    
+    if (print_no_print)
+           return INT_GET(head->h_num_logops, ARCH_CONVERT);
+    
+    if (INT_ISZERO(head->h_magicno, ARCH_CONVERT))
+        return ZEROED_LOG;
+
+    if (INT_GET(head->h_magicno, ARCH_CONVERT) != XLOG_HEADER_MAGIC_NUM) {
+       printf("Header 0x%x wanted 0x%x\n", 
+                INT_GET(head->h_magicno, ARCH_CONVERT), 
+                XLOG_HEADER_MAGIC_NUM);
+       return BAD_HEADER;
+    }
+    
+    datalen=INT_GET(head->h_len, ARCH_CONVERT);
+    bbs=(datalen/BBSIZE)+(datalen%BBSIZE)?1:0;
+            
+    printf("cycle: %d  version: %d     ", 
+            INT_GET(head->h_cycle, ARCH_CONVERT), 
+            INT_GET(head->h_version, ARCH_CONVERT));
+    print_lsn("        lsn", &head->h_lsn, ARCH_CONVERT);
+    print_lsn("        tail_lsn", &head->h_tail_lsn, ARCH_CONVERT);
+    printf("\n");
+    printf("length of Log Record: %d   prev offset: %d         num ops: %d\n",
+          datalen, 
+            INT_GET(head->h_prev_block, ARCH_CONVERT), 
+            INT_GET(head->h_num_logops, ARCH_CONVERT));
+    
+    if (print_overwrite) {
+        printf("cycle num overwrites: ");
+        for (i=0; i< bbs; i++)
+           printf("%d - 0x%x  ",
+                    i,
+                    INT_GET(head->h_cycle_data[i], ARCH_CONVERT));
+        printf("\n");
+    }
+    
+    uuid_unparse(head->h_fs_uuid, uub);
+    printf("uuid: %s   format: ", uub);
+    switch (INT_GET(head->h_fmt, ARCH_CONVERT)) {
+        case XLOG_FMT_UNKNOWN:  
+            printf("unknown\n"); 
+            break;
+        case XLOG_FMT_LINUX_LE: 
+            printf("little endian linux\n"); 
+            break;
+        case XLOG_FMT_LINUX_BE: 
+            printf("big endian linux\n"); 
+            break;
+        case XLOG_FMT_IRIX_BE: 
+            printf("big endian irix\n"); 
+            break;
+        default:                
+            printf("? (%d)\n", INT_GET(head->h_fmt, ARCH_CONVERT)); 
+            break;
+    }
+    
+    *len = INT_GET(head->h_len, ARCH_CONVERT);
+    return(INT_GET(head->h_num_logops, ARCH_CONVERT));
+}      /* xlog_print_rec_head */
+
+static void
+print_xlog_bad_zeroed(xfs_daddr_t blkno)
+{
+        print_stars();
+       printf("* ERROR: found data after zeroed blocks block=%-21lld  *\n",
+                (__int64_t)blkno);
+        print_stars();
+       if (print_exit)
+           xlog_exit("Bad log - data after zeroed blocks");
+}      /* print_xlog_bad_zeroed */
+
+static void
+print_xlog_bad_header(xfs_daddr_t blkno, xfs_caddr_t buf)
+{
+        print_stars();
+       printf("* ERROR: header cycle=%-11d block=%-21lld        *\n",
+               GET_CYCLE(buf, ARCH_CONVERT), (__int64_t)blkno);
+        print_stars();
+       if (print_exit)
+           xlog_exit("Bad log record header");
+}      /* print_xlog_bad_header */
+
+void
+print_xlog_bad_data(xfs_daddr_t blkno)
+{
+        print_stars();
+       printf("* ERROR: data block=%-21lld                             *\n", 
+                (__int64_t)blkno);
+        print_stars();
+       if (print_exit)
+           xlog_exit("Bad data in log");
+}      /* print_xlog_bad_data */
+
+
+/*
+ * This code is gross and needs to be rewritten.
+ */
+void xfs_log_print(xlog_t       *log,
+                   int          fd,
+                  int          print_block_start)
+{
+    char       hbuf[XLOG_HEADER_SIZE];
+    int                num_ops, len;
+    xfs_daddr_t        block_end = 0, block_start, blkno, error;
+    int                read_type = FULL_READ;
+    xfs_caddr_t        partial_buf;
+    int         zeroed = 0;
+
+    logBBsize = log->l_logBBsize;
+              
+    /*
+     * Normally, block_start and block_end are the same value since we
+     * are printing the entire log.  However, if the start block is given,
+     * we still end at the end of the logical log.
+     */
+    if (error = xlog_print_find_oldest(log, &block_end)) {
+           fprintf(stderr, "%s: problem finding oldest LR\n", progname);
+           return;
+    }
+    if (print_block_start == -1)
+           block_start = block_end;
+    else
+           block_start = print_block_start;
+    xlog_print_lseek(log, fd, block_start, SEEK_SET);
+    blkno    = block_start;
+    
+    for (;;) {
+       if (read(fd, hbuf, 512) == 0) {
+           printf("%s: physical end of log\n", progname);
+           print_xlog_record_line();
+           break;
+        }
+       if (print_only_data) {
+               printf("BLKNO: %lld\n", (__int64_t)blkno);
+               xlog_recover_print_data(hbuf, 512);
+               blkno++;
+               goto loop;
+       }
+       num_ops = xlog_print_rec_head((xlog_rec_header_t *)hbuf, &len);
+       blkno++;
+        
+        if (zeroed && num_ops != ZEROED_LOG) {
+            printf("%s: after %d zeroed blocks\n", progname, zeroed);
+            /* once we find zeroed blocks - that's all we expect */
+            print_xlog_bad_zeroed(blkno-1);
+            /* reset count since we're assuming previous zeroed blocks
+             * were bad
+             */
+            zeroed = 0;
+        }
+        
+        if (num_ops == ZEROED_LOG || num_ops == BAD_HEADER) {
+            if (num_ops == ZEROED_LOG) {
+                zeroed++; 
+            } else {
+               print_xlog_bad_header(blkno-1, hbuf);
+            }
+            
+           goto loop;
+       }
+        
+       error = xlog_print_record(fd, num_ops, len, &read_type, &partial_buf,
+                                 (xlog_rec_header_t *)hbuf);
+       switch (error) {
+           case 0: {
+               blkno += BTOBB(len);
+               if (print_block_start != -1 &&
+                   blkno >= block_end)         /* If start specified, we */
+                       goto end;               /* end early */
+               break;
+           }
+           case -1: {
+               print_xlog_bad_data(blkno-1);
+               if (print_block_start != -1 &&
+                   blkno >= block_end)         /* If start specified, */
+                       goto end;               /* we end early */
+               xlog_print_lseek(log, fd, blkno, SEEK_SET);
+               goto loop;
+           }
+           case PARTIAL_READ: {
+                print_xlog_record_line();
+               printf("%s: physical end of log\n", progname);
+                print_xlog_record_line();
+               blkno = 0;
+               xlog_print_lseek(log, fd, 0, SEEK_SET);
+               /*
+                * We may have hit the end of the log when we started at 0.
+                * In this case, just end.
+                */
+               if (block_start == 0)
+                       goto end;
+               goto partial_log_read;
+           }
+           default: xlog_panic("illegal value");
+       }
+       print_xlog_record_line();
+loop:
+       if (blkno >= logBBsize) {
+                if (zeroed) {
+                    printf("%s: skipped %d zeroed blocks\n", progname, zeroed);
+                    if (zeroed == logBBsize)
+                        printf("%s: totally zeroed log\n", progname);
+                    
+                    zeroed=0;
+                }
+               printf("%s: physical end of log\n", progname);
+               print_xlog_record_line();
+               break;
+       }
+    }
+
+    /* Do we need to print the first part of physical log? */
+    if (block_start != 0) {
+       blkno = 0;
+       xlog_print_lseek(log, fd, 0, SEEK_SET);
+       for (;;) {
+           if (read(fd, hbuf, 512) == 0) {
+               xlog_panic("xlog_find_head: bad read");
+           }
+           if (print_only_data) {
+               printf("BLKNO: %lld\n", (__int64_t)blkno);
+               xlog_recover_print_data(hbuf, 512);
+               blkno++;
+               goto loop2;
+           }
+           num_ops = xlog_print_rec_head((xlog_rec_header_t *)hbuf, &len);
+           blkno++;
+        
+           if (num_ops == ZEROED_LOG || num_ops == BAD_HEADER) {
+                /* we only expect zeroed log entries at the end
+                 * of the _physical_ log, so treat them the same
+                 * as bad blocks here
+                 */
+               print_xlog_bad_header(blkno-1, hbuf);
+                
+               if (blkno >= block_end)
+                   break;
+               continue;
+           }
+partial_log_read:
+           error= xlog_print_record(fd, num_ops, len, &read_type,
+                                   &partial_buf, (xlog_rec_header_t *)hbuf);
+           if (read_type != FULL_READ)
+               len -= read_type;
+           read_type = FULL_READ;
+           if (!error)
+               blkno += BTOBB(len);
+           else {
+               print_xlog_bad_data(blkno-1);
+               xlog_print_lseek(log, fd, blkno, SEEK_SET);
+               goto loop2;
+           }
+           print_xlog_record_line();
+loop2:
+           if (blkno >= block_end)
+               break;
+        }
+    }
+    
+end:
+    printf("%s: logical end of log\n", progname);
+    print_xlog_record_line();
+}
diff --git a/logprint/log_print_all.c b/logprint/log_print_all.c
new file mode 100644 (file)
index 0000000..a1a81cc
--- /dev/null
@@ -0,0 +1,593 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include "logprint.h"
+
+
+/*
+ * Start is defined to be the block pointing to the oldest valid log record.
+ * Used by log print code.  Don't put in cmd/xfs/logprint/xfs_log_print.c
+ * since most of the bread routines live in kern/fs/xfs/xfs_log_recover only.
+ */
+int
+xlog_print_find_oldest(
+       struct log  *log,
+       xfs_daddr_t *last_blk)
+{
+       xfs_buf_t       *bp;
+       xfs_daddr_t     first_blk;
+       uint    first_half_cycle, last_half_cycle;
+       int     error;
+       
+       if (xlog_find_zeroed(log, &first_blk))
+               return 0;
+
+       first_blk = 0;          /* read first block */
+       bp = xlog_get_bp(1, log->l_mp);
+       xlog_bread(log, 0, 1, bp);
+       first_half_cycle = GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT);
+       *last_blk = log->l_logBBsize-1; /* read last block */
+       xlog_bread(log, *last_blk, 1, bp);
+       last_half_cycle = GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT);
+       ASSERT(last_half_cycle != 0);
+
+       if (first_half_cycle == last_half_cycle) { /* all cycle nos are same */
+               *last_blk = 0;
+       } else {                /* have 1st and last; look for middle cycle */
+               error = xlog_find_cycle_start(log, bp, first_blk,
+                                             last_blk, last_half_cycle);
+               if (error)
+                       return error;
+       }
+
+       xlog_put_bp(bp);
+       return 0;
+} /* xlog_print_find_oldest */
+
+
+void
+xlog_recover_print_data(
+       xfs_caddr_t     p, 
+       int             len)
+{
+       if (print_data) {
+               uint *dp  = (uint *)p;
+               int  nums = len >> 2;
+               int  j = 0;
+
+               while (j < nums) {
+                       if ((j % 8) == 0)
+                               printf("%2x ", j);
+                       printf("%8x ", *dp);
+                       dp++;
+                       j++;
+                       if ((j % 8) == 0)
+                               printf("\n");
+               }
+               printf("\n");
+       }
+} /* xlog_recover_print_data */
+
+
+STATIC void
+xlog_recover_print_buffer(
+       xlog_recover_item_t *item)
+{
+       xfs_agi_t               *agi;
+       xfs_agf_t               *agf;
+       xfs_buf_log_format_v1_t *old_f;
+       xfs_buf_log_format_t    *f;
+       xfs_caddr_t             p;
+       int                     len, num, i;
+       xfs_daddr_t             blkno;
+       xfs_disk_dquot_t        *ddq;
+
+       f = (xfs_buf_log_format_t *)item->ri_buf[0].i_addr;
+       old_f = (xfs_buf_log_format_v1_t *)f;
+       len = item->ri_buf[0].i_len;
+       printf("        ");
+       switch (f->blf_type)  {
+           case XFS_LI_BUF: {
+               printf("BUF:  ");
+               break;
+           }
+           case XFS_LI_6_1_BUF: {
+               printf("6.1 BUF:  ");
+               break;
+           }
+           case XFS_LI_5_3_BUF: {
+               printf("5.3 BUF:  ");
+               break;
+           }
+       } 
+       if (f->blf_type == XFS_LI_BUF) {
+               printf("#regs:%d   start blkno:0x%Lx   len:%d   bmap size:%d\n",
+                      f->blf_size, f->blf_blkno, f->blf_len, f->blf_map_size);
+               blkno = (xfs_daddr_t)f->blf_blkno;
+       } else {
+               printf("#regs:%d   start blkno:0x%x   len:%d   bmap size:%d\n",
+                      old_f->blf_size, old_f->blf_blkno, old_f->blf_len,
+                      old_f->blf_map_size);
+               blkno = (xfs_daddr_t)old_f->blf_blkno;
+       }
+       num = f->blf_size-1;
+       i = 1;
+       while (num-- > 0) {
+               p = item->ri_buf[i].i_addr;
+               len = item->ri_buf[i].i_len;
+               i++;
+               if (blkno == 0) { /* super block */
+                       printf("        SUPER Block Buffer:\n");
+                       if (!print_buffer) continue;
+                       printf("                icount:%Ld  ifree:%Ld  ",
+                              INT_GET(*(long long *)(p), ARCH_CONVERT), 
+                               INT_GET(*(long long *)(p+8), ARCH_CONVERT));
+                       printf("fdblks:%Ld  frext:%Ld\n",
+                              INT_GET(*(long long *)(p+16), ARCH_CONVERT),
+                              INT_GET(*(long long *)(p+24), ARCH_CONVERT));
+                       printf("                sunit:%u  swidth:%u\n", 
+                              INT_GET(*(uint *)(p+56), ARCH_CONVERT),
+                              INT_GET(*(uint *)(p+60), ARCH_CONVERT));
+               } else if (INT_GET(*(uint *)p, ARCH_CONVERT) == XFS_AGI_MAGIC) {
+                       agi = (xfs_agi_t *)p;
+                       printf("        AGI Buffer: (XAGI)\n");
+                       if (!print_buffer) continue;
+                       printf("                ver:%d  ",
+                               INT_GET(agi->agi_versionnum, ARCH_CONVERT));
+                       printf("seq#:%d  len:%d  cnt:%d  root:%d\n",
+                               INT_GET(agi->agi_seqno, ARCH_CONVERT),
+                               INT_GET(agi->agi_length, ARCH_CONVERT),
+                               INT_GET(agi->agi_count, ARCH_CONVERT),
+                               INT_GET(agi->agi_root, ARCH_CONVERT));
+                       printf("                level:%d  free#:0x%x  newino:0x%x\n",
+                               INT_GET(agi->agi_level, ARCH_CONVERT),
+                               INT_GET(agi->agi_freecount, ARCH_CONVERT),
+                               INT_GET(agi->agi_newino, ARCH_CONVERT));
+               } else if (INT_GET(*(uint *)p, ARCH_CONVERT) == XFS_AGF_MAGIC) {
+                       agf = (xfs_agf_t *)p;
+                       printf("        AGF Buffer: (XAGF)\n");
+                       if (!print_buffer) continue;
+                       printf("                ver:%d  seq#:%d  len:%d  \n",
+                               INT_GET(agf->agf_versionnum, ARCH_CONVERT),
+                               INT_GET(agf->agf_seqno, ARCH_CONVERT),
+                               INT_GET(agf->agf_length, ARCH_CONVERT));
+                       printf("                root BNO:%d  CNT:%d\n",
+                               INT_GET(agf->agf_roots[XFS_BTNUM_BNOi],
+                                       ARCH_CONVERT),
+                               INT_GET(agf->agf_roots[XFS_BTNUM_CNTi],
+                                       ARCH_CONVERT));
+                       printf("                level BNO:%d  CNT:%d\n",
+                               INT_GET(agf->agf_levels[XFS_BTNUM_BNOi],
+                                       ARCH_CONVERT),
+                               INT_GET(agf->agf_levels[XFS_BTNUM_CNTi],
+                                       ARCH_CONVERT));
+                       printf("                1st:%d  last:%d  cnt:%d  "
+                               "freeblks:%d  longest:%d\n",
+                               INT_GET(agf->agf_flfirst, ARCH_CONVERT),
+                               INT_GET(agf->agf_fllast, ARCH_CONVERT),
+                               INT_GET(agf->agf_flcount, ARCH_CONVERT),
+                               INT_GET(agf->agf_freeblks, ARCH_CONVERT),
+                               INT_GET(agf->agf_longest, ARCH_CONVERT));
+               } else if (*(uint *)p == XFS_DQUOT_MAGIC) {
+                       ddq = (xfs_disk_dquot_t *)p;
+                       printf("        DQUOT Buffer:\n");
+                       if (!print_buffer) continue;
+                       printf("                UIDs 0x%x-0x%x\n", 
+                              INT_GET(ddq->d_id, ARCH_CONVERT),
+                              INT_GET(ddq->d_id, ARCH_CONVERT) +
+                              (BBTOB(f->blf_len) / sizeof(xfs_dqblk_t)) - 1);
+               } else {
+                       printf("        BUF DATA\n");
+                       if (!print_buffer) continue;
+                       xlog_recover_print_data(p, len);
+               }
+       }
+} /* xlog_recover_print_buffer */
+
+STATIC void
+xlog_recover_print_quotaoff(
+       xlog_recover_item_t *item)
+{
+       xfs_qoff_logformat_t *qoff_f;
+       char str[20];
+
+       qoff_f = (xfs_qoff_logformat_t *)item->ri_buf[0].i_addr;
+       ASSERT(qoff_f);
+       if (qoff_f->qf_flags & XFS_UQUOTA_ACCT) 
+               strcpy(str, "USER QUOTA");
+       if (qoff_f->qf_flags & XFS_PQUOTA_ACCT)
+               strcat(str, "PROJ QUOTA");
+       printf("\tQUOTAOFF: #regs:%d   type:%s\n",
+              qoff_f->qf_size, str);
+}
+
+
+STATIC void
+xlog_recover_print_dquot(
+       xlog_recover_item_t *item)
+{
+       xfs_dq_logformat_t      *f;
+       xfs_disk_dquot_t        *d;
+
+       f = (xfs_dq_logformat_t *)item->ri_buf[0].i_addr;
+       ASSERT(f);
+       ASSERT(f->qlf_len == 1);
+       d = (xfs_disk_dquot_t *)item->ri_buf[1].i_addr;
+       printf("\tDQUOT: #regs:%d  blkno:%Ld  boffset:%u id: %d\n",
+              f->qlf_size, f->qlf_blkno, f->qlf_boffset, f->qlf_id);
+       if (!print_quota)
+               return;
+       printf("\t\tmagic 0x%x\tversion 0x%x\tID 0x%x (%d)\t\n",
+              INT_GET(d->d_magic, ARCH_CONVERT),
+              INT_GET(d->d_version, ARCH_CONVERT),
+              INT_GET(d->d_id, ARCH_CONVERT),
+              INT_GET(d->d_id, ARCH_CONVERT));
+       printf("\t\tblk_hard 0x%x\tblk_soft 0x%x\tino_hard 0x%x"
+              "\tino_soft 0x%x\n",
+              (int)INT_GET(d->d_blk_hardlimit, ARCH_CONVERT),
+              (int)INT_GET(d->d_blk_softlimit, ARCH_CONVERT),
+              (int)INT_GET(d->d_ino_hardlimit, ARCH_CONVERT),
+              (int)INT_GET(d->d_ino_softlimit, ARCH_CONVERT));
+       printf("\t\tbcount 0x%x (%d) icount 0x%x (%d)\n",
+              (int)INT_GET(d->d_bcount, ARCH_CONVERT),
+              (int)INT_GET(d->d_bcount, ARCH_CONVERT),
+              (int)INT_GET(d->d_icount, ARCH_CONVERT),
+              (int)INT_GET(d->d_icount, ARCH_CONVERT));
+       printf("\t\tbtimer 0x%x itimer 0x%x \n",
+              (int)INT_GET(d->d_btimer, ARCH_CONVERT),
+              (int)INT_GET(d->d_itimer, ARCH_CONVERT));
+}
+
+STATIC void
+xlog_recover_print_inode_core(
+       xfs_dinode_core_t *di)
+{
+       printf("        CORE inode:\n");
+       if (!print_inode)
+               return;
+       printf("                magic:%c%c  mode:0x%x  ver:%d  format:%d  "
+            "onlink:%d\n",
+               (di->di_magic>>8) & 0xff, di->di_magic & 0xff, 
+              di->di_mode, di->di_version, di->di_format, di->di_onlink);
+       printf("                uid:%d  gid:%d  nlink:%d projid:%d\n",
+              di->di_uid, di->di_gid, di->di_nlink, (uint)di->di_projid);
+       printf("                atime:%d  mtime:%d  ctime:%d\n",
+              di->di_atime.t_sec, di->di_mtime.t_sec, di->di_ctime.t_sec);
+       printf("                size:0x%Lx  nblks:0x%Lx  exsize:%d  nextents:%d"
+              "  anextents:%d\n",
+              di->di_size, di->di_nblocks, di->di_extsize, di->di_nextents,
+              (int)di->di_anextents);
+       printf("                forkoff:%d  dmevmask:0x%x  dmstate:%d  flags:0x%x  "
+            "gen:%d\n",
+              (int)di->di_forkoff, di->di_dmevmask, (int)di->di_dmstate,
+              (int)di->di_flags, di->di_gen);
+} /* xlog_recover_print_inode_core */
+
+
+STATIC void
+xlog_recover_print_inode(
+       xlog_recover_item_t *item)
+{
+       xfs_inode_log_format_t  *f;
+       int                     attr_index;
+       int                     hasdata;
+       int                     hasattr;
+
+       f = (xfs_inode_log_format_t *)item->ri_buf[0].i_addr;
+       ASSERT(item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t));
+       printf("        INODE: #regs:%d   ino:0x%Lx  flags:0x%x   dsize:%d\n",
+              f->ilf_size, f->ilf_ino, f->ilf_fields, f->ilf_dsize);
+
+       /* core inode comes 2nd */
+       ASSERT(item->ri_buf[1].i_len == sizeof(xfs_dinode_core_t));
+       xlog_recover_print_inode_core((xfs_dinode_core_t *)
+                                     item->ri_buf[1].i_addr);
+
+       hasdata = (f->ilf_fields & XFS_ILOG_DFORK) != 0;
+       hasattr = (f->ilf_fields & XFS_ILOG_AFORK) != 0;
+       /* does anything come next */
+       switch (f->ilf_fields & (XFS_ILOG_DFORK | XFS_ILOG_DEV | XFS_ILOG_UUID)) {
+             case XFS_ILOG_DEXT: {
+                     ASSERT(f->ilf_size == 3 + hasattr);
+                     printf("          DATA FORK EXTENTS inode data:\n");
+                     if (print_inode && print_data) {
+                             xlog_recover_print_data(item->ri_buf[2].i_addr,
+                                                     item->ri_buf[2].i_len);
+                     }
+                     break;
+             }
+             case XFS_ILOG_DBROOT: {
+                     ASSERT(f->ilf_size == 3 + hasattr);
+                     printf("          DATA FORK BTREE inode data:\n");
+                     if (print_inode && print_data) {
+                             xlog_recover_print_data(item->ri_buf[2].i_addr,
+                                                     item->ri_buf[2].i_len);
+                     }
+                     break;
+             }
+             case XFS_ILOG_DDATA: {
+                     ASSERT(f->ilf_size == 3 + hasattr);
+                     printf("          DATA FORK LOCAL inode data:\n");
+                     if (print_inode && print_data) {
+                             xlog_recover_print_data(item->ri_buf[2].i_addr,
+                                                     item->ri_buf[2].i_len);
+                     }
+                     break;
+             }
+             case XFS_ILOG_DEV: {
+                     ASSERT(f->ilf_size == 2 + hasattr);
+                     printf("          DEV inode: no extra region\n");
+                     break;
+             }
+             case XFS_ILOG_UUID: {
+                     ASSERT(f->ilf_size == 2 + hasattr);
+                     printf("          UUID inode: no extra region\n");
+                     break;
+             }
+
+
+             case 0: {
+                     ASSERT(f->ilf_size == 2 + hasattr);
+                     break;
+             }
+             default: {
+                     xlog_panic("xlog_print_trans_inode: illegal inode type");
+             }
+       }
+
+       if (hasattr) {
+               attr_index = 2 + hasdata;
+               switch (f->ilf_fields & XFS_ILOG_AFORK) {
+                     case XFS_ILOG_AEXT: {
+                             ASSERT(f->ilf_size == 3 + hasdata);
+                             printf("          ATTR FORK EXTENTS inode data:\n");
+                             if (print_inode && print_data) {
+                                     xlog_recover_print_data(
+                                               item->ri_buf[attr_index].i_addr,
+                                               item->ri_buf[attr_index].i_len);
+                             }
+                             break;
+                     }
+                     case XFS_ILOG_ABROOT: {
+                             ASSERT(f->ilf_size == 3 + hasdata);
+                             printf("          ATTR FORK BTREE inode data:\n");
+                             if (print_inode && print_data) {
+                                     xlog_recover_print_data(
+                                               item->ri_buf[attr_index].i_addr,
+                                               item->ri_buf[attr_index].i_len);
+                             }
+                             break;
+                     }
+                     case XFS_ILOG_ADATA: {
+                             ASSERT(f->ilf_size == 3 + hasdata);
+                             printf("          ATTR FORK LOCAL inode data:\n");
+                             if (print_inode && print_data) {
+                                     xlog_recover_print_data(
+                                               item->ri_buf[attr_index].i_addr,
+                                               item->ri_buf[attr_index].i_len);
+                             }
+                             break;
+                     }
+                     default: {
+                             xlog_panic("xlog_print_trans_inode: "
+                                        "illegal inode log flag");
+                     }
+               }
+       }
+    
+} /* xlog_recover_print_inode */
+
+
+STATIC void
+xlog_recover_print_efd(
+       xlog_recover_item_t *item)
+{
+       xfs_efd_log_format_t *f;
+       xfs_extent_t     *ex;
+       int                      i;
+
+       f = (xfs_efd_log_format_t *)item->ri_buf[0].i_addr;
+       /*
+        * An xfs_efd_log_format structure contains a variable length array
+        * as the last field.  Each element is of size xfs_extent_t.
+        */
+       ASSERT(item->ri_buf[0].i_len == 
+              sizeof(xfs_efd_log_format_t) + sizeof(xfs_extent_t) *
+              (f->efd_nextents-1));
+       printf("        EFD:  #regs: %d    num_extents: %d  id: 0x%Lx\n",
+              f->efd_size, f->efd_nextents, f->efd_efi_id);
+       ex = f->efd_extents;
+       printf("        ");
+       for (i=0; i < f->efd_size; i++) {
+               printf("(s: 0x%Lx, l: %d) ", ex->ext_start, ex->ext_len);
+               if (i % 4 == 3)
+                       printf("\n");
+               ex++;
+       }
+       if (i % 4 != 0) printf("\n");
+       return;
+} /* xlog_recover_print_efd */
+
+
+STATIC void
+xlog_recover_print_efi(
+       xlog_recover_item_t *item)
+{
+       xfs_efi_log_format_t *f;
+       xfs_extent_t     *ex;
+       int                      i;
+    
+       f = (xfs_efi_log_format_t *)item->ri_buf[0].i_addr;
+       /*
+        * An xfs_efi_log_format structure contains a variable length array
+        * as the last field.  Each element is of size xfs_extent_t.
+        */
+       ASSERT(item->ri_buf[0].i_len == 
+              sizeof(xfs_efi_log_format_t) + sizeof(xfs_extent_t) *
+              (f->efi_nextents-1));
+       
+       printf("        EFI:  #regs:%d    num_extents:%d  id:0x%Lx\n",
+              f->efi_size, f->efi_nextents, f->efi_id);
+       ex = f->efi_extents;
+       printf("        ");
+       for (i=0; i< f->efi_nextents; i++) {
+               printf("(s: 0x%Lx, l: %d) ", ex->ext_start, ex->ext_len);
+               if (i % 4 == 3) printf("\n");
+               ex++;
+       }
+       if (i % 4 != 0) printf("\n");
+       return;
+} /* xlog_recover_print_efi */
+
+void
+xlog_recover_print_logitem(
+       xlog_recover_item_t *item)
+{
+       switch (ITEM_TYPE(item)) {
+             case XFS_LI_BUF:
+             case XFS_LI_6_1_BUF:
+             case XFS_LI_5_3_BUF: {
+                     xlog_recover_print_buffer(item);
+                     break;
+             }
+             case XFS_LI_INODE:
+             case XFS_LI_6_1_INODE:
+             case XFS_LI_5_3_INODE: {
+                     xlog_recover_print_inode(item);
+                     break;
+             }
+             case XFS_LI_EFD: {
+                     xlog_recover_print_efd(item);
+                     break;
+             }
+             case XFS_LI_EFI: {
+                     xlog_recover_print_efi(item);
+                     break;
+             }
+             case XFS_LI_DQUOT: {
+                     xlog_recover_print_dquot(item);
+                     break;
+             }
+             case XFS_LI_QUOTAOFF: {
+                     xlog_recover_print_quotaoff(item);
+                     break;
+             }
+             default: {
+                     printf("xlog_recover_print_logitem: illegal type\n");
+                     break;
+             }
+       }
+} /* xlog_recover_print_logitem */
+
+void
+xlog_recover_print_item(xlog_recover_item_t *item)
+{
+       int i;
+
+       switch (ITEM_TYPE(item)) {
+           case XFS_LI_BUF: {
+               printf("BUF");
+               break;
+           }
+           case XFS_LI_INODE: {
+               printf("INO");
+               break;
+           }
+           case XFS_LI_EFD: {
+               printf("EFD");
+               break;
+           }
+           case XFS_LI_EFI: {
+               printf("EFI");
+               break;
+           }
+           case XFS_LI_6_1_BUF:  {
+               printf("6.1 BUF");
+               break;
+           }
+           case XFS_LI_5_3_BUF: {
+               printf("5.3 BUF");
+               break;
+           }
+           case XFS_LI_6_1_INODE: {
+               printf("6.1 INO");
+               break;
+           }
+           case XFS_LI_5_3_INODE: {
+               printf("5.3 INO");
+               break;
+           }
+           case XFS_LI_DQUOT: {
+               printf("DQ ");
+               break;
+           }
+           case XFS_LI_QUOTAOFF: {
+               printf("QOFF");
+               break;
+           } 
+           default: {
+               cmn_err(CE_PANIC, "xlog_recover_print_item: illegal type");
+               break;
+           }
+       }
+
+/*     type isn't filled in yet
+       printf("ITEM: type: %d cnt: %d total: %d ",
+              item->ri_type, item->ri_cnt, item->ri_total);
+*/
+       printf(": cnt:%d total:%d ", item->ri_cnt, item->ri_total);
+       for (i=0; i<item->ri_cnt; i++) {
+               printf("a:%p len:%d ",
+                      item->ri_buf[i].i_addr, item->ri_buf[i].i_len);
+       }
+       printf("\n");
+       xlog_recover_print_logitem(item);
+}      /* xlog_recover_print_item */
+
+void
+xlog_recover_print_trans(xlog_recover_t             *trans,
+                        xlog_recover_item_t *itemq,
+                        int                 print)
+{
+       xlog_recover_item_t *first_item, *item;
+
+       if (print < 3)
+               return;
+
+        print_xlog_record_line();
+       xlog_recover_print_trans_head(trans);
+       item = first_item = itemq;
+       do {
+               xlog_recover_print_item(item);
+               item = item->ri_next;
+       } while (first_item != item);
+}      /* xlog_recover_print_trans */
diff --git a/logprint/log_print_trans.c b/logprint/log_print_trans.c
new file mode 100644 (file)
index 0000000..9b83046
--- /dev/null
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include "logprint.h"
+
+void
+xlog_recover_print_trans_head(
+        xlog_recover_t *tr)
+{
+        printf("TRANS: tid:0x%x  type:%s  #items:%d  trans:0x%x  q:%p\n",
+               tr->r_log_tid, trans_type[tr->r_theader.th_type],
+               tr->r_theader.th_num_items,
+               tr->r_theader.th_tid, tr->r_itemq);
+}       /* xlog_recover_print_trans_head */
+
+int
+xlog_recover_do_trans(xlog_t        *log,
+                     xlog_recover_t *trans,
+                     int            pass)
+{
+       xlog_recover_print_trans(trans, trans->r_itemq, 3);
+       return 0;
+}      /* xlog_recover_do_trans */
+
+static int print_record_header=0;
+
+void
+xfs_log_print_trans(xlog_t      *log,
+                   int         print_block_start)
+{
+       xfs_daddr_t     head_blk, tail_blk;
+
+       if (xlog_find_tail(log, &head_blk, &tail_blk, 0))
+            exit(1);
+        
+       printf("    log tail: %lld head: %lld state: %s\n",
+                (__int64_t)tail_blk, 
+                (__int64_t)head_blk,
+                (tail_blk == head_blk)?"<CLEAN>":"<DIRTY>");
+        
+        if (print_block_start != -1) {
+           printf("    override tail: %lld\n",
+                   (__int64_t)print_block_start);
+           tail_blk = print_block_start;
+        }
+        printf("\n");
+        
+        print_record_header=1;
+        if (xlog_do_recovery_pass(log, head_blk, tail_blk, XLOG_RECOVER_PASS1))
+            exit(1);
+
+}      /* xfs_log_print_trans */
+
+static int
+header_check_uuid(xfs_mount_t *mp, xlog_rec_header_t *head)
+{
+    char uu_log[64], uu_sb[64];
+    
+    if (!uuid_compare(mp->m_sb.sb_uuid, head->h_fs_uuid)) return 0;
+
+    uuid_unparse(mp->m_sb.sb_uuid, uu_sb);
+    uuid_unparse(head->h_fs_uuid, uu_log);
+
+    printf("* ERROR: mismatched uuid in log\n"
+           "*            SB : %s\n*            log: %s\n",
+            uu_sb, uu_log);
+    
+    return 1;
+}
+
+int
+xlog_header_check_recover(xfs_mount_t *mp, xlog_rec_header_t *head)
+{
+    if (print_record_header) 
+        printf("\nLOG REC AT LSN cycle %d block %d (0x%x, 0x%x)\n",
+              CYCLE_LSN(head->h_lsn, ARCH_CONVERT), 
+               BLOCK_LSN(head->h_lsn, ARCH_CONVERT),
+              CYCLE_LSN(head->h_lsn, ARCH_CONVERT), 
+               BLOCK_LSN(head->h_lsn, ARCH_CONVERT));
+    
+    if (INT_GET(head->h_magicno, ARCH_CONVERT) != XLOG_HEADER_MAGIC_NUM) {
+        
+        printf("* ERROR: bad magic number in log header: 0x%x\n",
+                INT_GET(head->h_magicno, ARCH_CONVERT));
+        
+    } else if (header_check_uuid(mp, head)) {
+        
+        /* failed - fall through */
+        
+    } else if (INT_GET(head->h_fmt, ARCH_CONVERT) != XLOG_FMT) {
+        
+       printf("* ERROR: log format incompatible (log=%d, ours=%d)\n",
+                INT_GET(head->h_fmt, ARCH_CONVERT), XLOG_FMT);
+        
+    } else {
+        /* everything is ok */
+        return 0;
+    }
+    
+    /* bail out now or just carry on regardless */
+    if (print_exit)
+        xlog_exit("Bad log");
+    return 0;   
+}
+
+int
+xlog_header_check_mount(xfs_mount_t *mp, xlog_rec_header_t *head)
+{
+    if (uuid_is_null(head->h_fs_uuid)) return 0;
+    if (header_check_uuid(mp, head)) {
+        /* bail out now or just carry on regardless */
+        if (print_exit)
+            xlog_exit("Bad log");
+    }
+    return 0;
+}
diff --git a/logprint/logprint.c b/logprint/logprint.c
new file mode 100644 (file)
index 0000000..16a652d
--- /dev/null
@@ -0,0 +1,247 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include "logprint.h"
+#include <errno.h>
+#include <fcntl.h>
+
+int    print_data;
+int    print_only_data;
+int    print_inode;
+int    print_quota;
+int    print_buffer;
+int    print_transactions;
+int    print_overwrite;
+int     print_no_data;
+int     print_no_print;
+int     print_exit = 1; /* -e is now default. specify -c to override */
+
+libxfs_init_t  x;
+xfs_mount_t    mp;
+
+void
+usage(void)
+{
+       fprintf(stderr, "Usage: %s [options...] <device>\n\n\
+Options:\n\
+    -c             try to continue if error found in log\n\
+    -l <device>     filename of external log\n\
+    -n             don't try and interpret log data\n\
+    -o             print buffer data in hex\n\
+    -s <start blk>  block # to start printing\n\
+    -v              print \"overwrite\" data\n\
+    -t             print out transactional view\n\
+        -b          in transactional view, extract buffer info\n\
+        -i          in transactional view, extract inode info\n\
+        -q          in transactional view, extract quota info\n\
+    -D              print only data; no decoding\n\
+    -V              print version information\n", 
+        progname);
+       exit(1);
+}
+
+int
+logstat(libxfs_init_t *x)
+{
+       int             fd;
+       char            buf[BBSIZE];
+       xfs_sb_t        *sb;
+
+       /* On Linux we always read the superblock of the
+        * filesystem. We need this to get the length of the
+        * log. Otherwise we end up seeking forever. -- mkp
+        */
+       if ((fd = open(x->dname, O_RDONLY)) == -1) {
+               fprintf(stderr, "    Can't open device %s: %s\n",
+                       x->dname, strerror(errno));
+               exit(1);
+       }
+       lseek64(fd, 0, SEEK_SET);
+       if (read(fd, buf, sizeof(buf)) != sizeof(buf)) {
+               fprintf(stderr, "    read of XFS superblock failed\n");
+               exit(1);
+       } 
+        close (fd);
+
+       /* 
+        * Conjure up a mount structure 
+        */
+       libxfs_xlate_sb(buf, &(mp.m_sb), 1, ARCH_CONVERT, XFS_SB_ALL_BITS);
+       sb = &(mp.m_sb);
+       mp.m_blkbb_log = sb->sb_blocklog - BBSHIFT;
+
+       x->logBBsize = XFS_FSB_TO_BB(&mp, sb->sb_logblocks);
+       x->logBBstart = XFS_FSB_TO_DADDR(&mp, sb->sb_logstart);
+
+       if (!x->logname && sb->sb_logstart == 0) {
+               fprintf(stderr, "    external log device not specified\n\n");
+                usage();
+                /*NOTREACHED*/
+       }           
+
+       if (x->logname && *x->logname) {    /* External log */
+               if ((fd = open(x->logname, O_RDONLY)) == -1) {
+                       fprintf(stderr, "Can't open file %s: %s\n",
+                               x->logname, strerror(errno));
+                       exit(1);
+               }
+                close(fd);
+       } else {                            /* Internal log */
+               x->logdev = x->ddev;
+       }
+
+       return 0;
+}
+
+int
+main(int argc, char **argv)
+{
+       int             print_start = -1;
+       int             c;
+        int             logfd;
+        xlog_t         log = {0};
+
+       progname = basename(argv[0]);
+       while ((c = getopt(argc, argv, "bel:iqnors:tDVvc")) != EOF) {
+               switch (c) {
+                       case 'D': {
+                               print_only_data++;
+                               print_data++;
+                               break;
+                       }
+                       case 'b': {
+                               print_buffer++;
+                               break;
+                       }
+                       case 'l': {
+                               x.logname = optarg;
+                               x.lisfile = 1;
+                               break;
+                       }
+                       case 'c': { 
+                            /* default is to stop on error. 
+                             * -c turns this off.
+                             */
+                               print_exit=0;
+                               break;
+                       }
+                       case 'e': { 
+                            /* -e is now default
+                             */
+                               print_exit++;
+                               break;
+                       }
+                       case 'i': {
+                               print_inode++;
+                               break;
+                       }
+                       case 'q': {
+                               print_quota++;
+                               break;
+                       }
+                       case 'n': {
+                               print_no_data++;
+                               break;
+                       }
+                       case 'o': {
+                               print_data++;
+                               break;
+                       }
+                       case 's': {
+                               print_start = atoi(optarg);
+                               break;
+                       }
+                       case 't': {
+                               print_transactions++;
+                               break;
+                       }
+                       case 'V': {
+                               printf("%s version %s\n", progname, VERSION);
+                               break;
+                        }
+                        case 'v': {
+                                print_overwrite++;
+                                break;
+                       }
+                       case '?': {
+                               usage();
+                       }
+               }
+       }
+
+       if (argc - optind != 1)
+               usage();
+
+       x.dname = argv[optind];
+
+       if (x.dname == NULL)
+               usage();
+
+       x.notvolok = 1;
+       x.isreadonly = LIBXFS_ISINACTIVE;
+       x.notvolmsg = "You should never see this message.\n";
+
+        printf("xfs_logprint:\n");
+       if (!libxfs_init(&x))
+               exit(1);
+
+       logstat(&x);
+
+        logfd=(x.logfd<0)?(x.dfd):(x.logfd);
+        
+        printf("    data device: 0x%Lx\n", x.ddev);
+        
+        if (x.logname) {
+                printf("    log file: \"%s\" ", x.logname);
+        } else {
+                printf("    log device: 0x%Lx ", x.logdev);
+        }
+
+        printf("daddr: %Ld length: %Ld\n\n",
+                (__int64_t)x.logBBstart, (__int64_t)x.logBBsize);
+        
+        ASSERT(x.logBBstart <= INT_MAX);
+
+        /* init log structure */
+       log.l_dev          = x.logdev;
+       log.l_logsize     = BBTOB(x.logBBsize);
+       log.l_logBBstart  = x.logBBstart;
+       log.l_logBBsize   = x.logBBsize;
+        log.l_mp          = &mp;
+       if (print_transactions)
+               xfs_log_print_trans(&log, print_start);
+       else
+               xfs_log_print(&log, logfd, print_start);
+        
+       exit(0);
+}
diff --git a/logprint/logprint.h b/logprint/logprint.h
new file mode 100644 (file)
index 0000000..17eb3ba
--- /dev/null
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef XFS_LOGPRINT_H
+#define XFS_LOGPRINT_H
+
+#include <libxfs.h>
+#include <string.h>
+#include <errno.h>
+
+/*
+ * define the userlevel xlog_t to be the subset of the kernel's
+ * xlog_t that we actually need to get our work done, avoiding
+ * the need to define any exotic kernel types in userland.
+ */
+typedef struct log {
+       xfs_lsn_t       l_tail_lsn;     /* lsn of 1st LR w/ unflush buffers */
+       xfs_lsn_t       l_last_sync_lsn;/* lsn of last LR on disk */
+       xfs_mount_t     *l_mp;          /* mount point */
+       dev_t           l_dev;          /* dev_t of log */
+       xfs_daddr_t     l_logBBstart;   /* start block of log */
+       int             l_logsize;      /* size of log in bytes */
+       int             l_logBBsize;    /* size of log in 512 byte chunks */
+       int             l_roundoff;     /* round off error of all iclogs */
+       int             l_curr_cycle;   /* Cycle number of log writes */
+       int             l_prev_cycle;   /* Cycle # b4 last block increment */
+       int             l_curr_block;   /* current logical block of log */
+       int             l_prev_block;   /* previous logical block of log */
+       int             l_iclog_size;    /* size of log in bytes */
+       int             l_iclog_size_log;/* log power size of log */
+       int             l_iclog_bufs;    /* number of iclog buffers */
+       int             l_grant_reserve_cycle;  /* */
+       int             l_grant_reserve_bytes;  /* */
+       int             l_grant_write_cycle;    /* */
+       int             l_grant_write_bytes;    /* */
+} xlog_t;
+
+#include <xfs_log_recover.h>
+#include <xfs_buf_item.h>
+#include <xfs_inode_item.h>
+#include <xfs_extfree_item.h>
+#include <xfs_dquot_item.h>
+
+
+/*
+ * macros mapping kernel code to user code
+ */
+#define STATIC                 static
+#define EFSCORRUPTED            EIO
+#define XFS_ERROR(e)           (e)
+
+#define xlog_warn(fmt,args...) \
+       ( fprintf(stderr,fmt,## args), fputc('\n', stderr) )
+#define cmn_err(sev,fmt,args...) \
+        xlog_warn(fmt,## args)
+#define xlog_exit(fmt,args...) \
+       ( xlog_warn(fmt,## args), exit(1) )
+#define xlog_panic(fmt,args...) \
+       xlog_exit(fmt,## args)
+
+#define xlog_get_bp(nbblks, mp)        libxfs_getbuf(x.logdev, 0, (nbblks))
+#define xlog_put_bp(bp)                libxfs_putbuf(bp)
+#define xlog_bread(log,blkno,nbblks,bp)        \
+       (libxfs_readbufr(x.logdev,      \
+                       (log)->l_logBBstart+(blkno), bp, (nbblks), 1), 0)
+                         
+#define kmem_zalloc(size, foo)                 calloc(size,1)
+#define kmem_free(ptr, foo)                    free(ptr)
+#define kmem_realloc(ptr, len, old, foo)       realloc(ptr, len)
+
+/* command line flags */
+extern int     print_data;
+extern int     print_only_data;
+extern int     print_inode;
+extern int     print_quota;
+extern int     print_buffer;
+extern int     print_transactions;
+extern int     print_overwrite;
+
+extern int     print_exit;
+extern int     print_no_data;
+extern int     print_no_print;
+
+/* exports */
+
+extern char *trans_type[];
+
+/* libxfs parameters */
+extern libxfs_init_t   x;
+
+extern void xfs_log_print_trans(xlog_t          *log,
+                               int             print_block_start);
+
+extern void xfs_log_print(      xlog_t          *log,
+                                int             fd,
+                               int             print_block_start);
+
+extern int  xlog_find_zeroed(xlog_t *log, xfs_daddr_t *blk_no);
+extern int  xlog_find_cycle_start(xlog_t *log, xfs_buf_t *bp,
+               xfs_daddr_t first_blk, xfs_daddr_t *last_blk, uint cycle);
+extern int  xlog_find_tail(xlog_t *log, xfs_daddr_t *head_blk,
+               xfs_daddr_t *tail_blk, int readonly);
+
+extern int  xlog_test_footer(xlog_t *log);
+extern int  xlog_recover(xlog_t *log, int readonly);
+extern void xlog_recover_print_data(xfs_caddr_t p, int len);
+extern void xlog_recover_print_logitem(xlog_recover_item_t *item);
+extern void xlog_recover_print_trans_head(xlog_recover_t *tr);
+extern int  xlog_print_find_oldest(xlog_t *log, xfs_daddr_t *last_blk);
+
+extern void print_xlog_op_line(void);
+extern void print_xlog_record_line(void);
+extern void print_stars(void);
+
+/* for transactional view */
+extern void xlog_recover_print_trans_head(xlog_recover_t *tr);
+
+extern void xlog_recover_print_trans(  xlog_recover_t          *trans,
+                                       xlog_recover_item_t     *itemq,
+                                       int                     print);
+
+extern int  xlog_do_recovery_pass(     xlog_t          *log,
+                                       xfs_daddr_t     head_blk,
+                                       xfs_daddr_t     tail_blk,
+                                       int             pass);
+extern int  xlog_recover_do_trans(     xlog_t          *log,
+                                       xlog_recover_t  *trans,
+                                       int             pass);
+extern int  xlog_header_check_recover(  xfs_mount_t         *mp, 
+                                        xlog_rec_header_t   *head);
+extern int  xlog_header_check_mount(    xfs_mount_t         *mp, 
+                                        xlog_rec_header_t   *head);
+
+#endif /* XFS_LOGPRINT_H */
diff --git a/man/Makefile b/man/Makefile
new file mode 100644 (file)
index 0000000..139d5ae
--- /dev/null
@@ -0,0 +1,41 @@
+#
+# Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+# 
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# 
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like.  Any license provided herein, whether implied or
+# otherwise, applies only to this software file.  Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write the Free Software Foundation, Inc., 59
+# Temple Place - Suite 330, Boston MA 02111-1307, USA.
+# 
+# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+# Mountain View, CA  94043, or:
+# 
+# http://www.sgi.com 
+# 
+# For further information regarding this notice, see: 
+# 
+# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+#
+
+TOPDIR = ..
+include $(TOPDIR)/include/builddefs
+
+SUBDIRS = man1 man2 man3 man5 man8
+
+default install : $(SUBDIRS)
+       $(SUBDIRS_MAKERULE)
+
+include $(BUILDRULES)
diff --git a/man/man5/Makefile b/man/man5/Makefile
new file mode 100644 (file)
index 0000000..8602606
--- /dev/null
@@ -0,0 +1,49 @@
+#
+# Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+# 
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# 
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like.  Any license provided herein, whether implied or
+# otherwise, applies only to this software file.  Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write the Free Software Foundation, Inc., 59
+# Temple Place - Suite 330, Boston MA 02111-1307, USA.
+# 
+# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+# Mountain View, CA  94043, or:
+# 
+# http://www.sgi.com 
+# 
+# For further information regarding this notice, see: 
+# 
+# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+#
+
+TOPDIR = ../..
+include $(TOPDIR)/include/builddefs
+
+MAN_SECTION    = 5
+
+MAN_PAGES      = $(shell echo *.$(MAN_SECTION))
+MAN_DEST       = $(XFS_CMDS_MAN_DIR)/man$(MAN_SECTION)
+LSRCFILES      = $(MAN_PAGES)
+
+
+default : $(MAN_PAGES)
+
+include $(BUILDRULES)
+
+install : default
+       $(INSTALL) -m 755 -d $(MAN_DEST)
+       $(INSTALL_MAN)
diff --git a/man/man5/xfs.5 b/man/man5/xfs.5
new file mode 100644 (file)
index 0000000..a358f4a
--- /dev/null
@@ -0,0 +1,114 @@
+.TH xfs 5
+.SH NAME
+xfs \- layout of the XFS filesystem
+.SH DESCRIPTION
+An XFS filesystem can reside on a regular disk partition or on a
+logical volume (see
+.IR lvm (8)).
+An XFS filesystem has up to three parts:
+a data section, a log section, and a real-time section.
+For disk partition filesystems,
+the real-time section is absent, and
+the log area is contained within the data section.
+For logical volume filesystems,
+the real-time section is optional,
+and the log section can be separate from the data section
+or contained within it.
+The filesystem sections are divided into a certain number of
+.IR blocks ,
+whose size is specified at
+.IR mkfs (8)
+time with the
+.B \-b
+option.
+.PP
+The data section contains all the filesystem metadata
+(inodes, directories, indirect blocks)
+as well as the user file data for ordinary (non-real-time) files
+and the log area if the log is
+.I internal
+to the data section.
+The data section is divided into a number of
+\f2allocation groups\f1.
+The number and size of the allocation groups are chosen by
+.I mkfs
+so that there is normally a small number of equal-sized groups.
+The number of allocation groups controls the amount of parallelism
+available in file and block allocation.
+It should be increased from
+the default if there is sufficient memory and a lot of allocation
+activity.
+The number of allocation groups should not be set very high,
+since this can cause large amounts of CPU time to be used by
+the filesystem, especially when the filesystem is nearly full.
+More allocation groups are added (of the original size) when
+.IR xfs_growfs (8)
+is run.
+.PP
+The log section (or area, if it is internal to the data section)
+is used to store changes to filesystem metadata while the
+filesystem is running until those changes are made to the data
+section.
+It is written sequentially during normal operation and read only
+during mount.
+When mounting a filesystem after a crash, the log
+is read to complete operations that were
+in progress at the time of the crash.
+.PP
+The real-time section is used to store the data of real-time files.
+These files had an attribute bit set through
+.IR fcntl (2)
+after file creation, before any data was written to the file.
+The real-time section is divided into a number of
+.I extents
+of fixed size (specified at
+.I mkfs
+time).
+Each file in the real-time section has an extent size that
+is a multiple of the real-time section extent size.
+.PP
+Each allocation group contains several data structures.
+The first sector contains the superblock.
+For allocation groups after the first,
+the superblock is just a copy and is not updated after
+.IR mkfs .
+The next three sectors contain information for block and inode
+allocation within the allocation group.
+Also contained within each allocation group are data structures
+to locate free blocks and inodes;
+these are located through the header structures.
+.PP
+Each XFS filesystem is labeled with a unique
+universal identifier (UUID).
+The UUID is stored in every allocation group header and
+is used to help distinguish one XFS filesystem from another,
+therefore you should avoid using
+.I dd
+or other block-by-block copying programs to copy XFS filesystems.
+If two XFS filesystems on the same machine have the UUID,
+.I xfsdump
+may become confused when doing incremental and resumed dumps.
+(See
+.IR xfsdump (8)
+for more details.)
+.I xfs_copy
+or
+.IR xfsdump / xfsrestore
+are recommended for making copies of XFS filesystems.
+.PP
+All these data structures are subject to change, and the
+headers that specify their layout on disk are not provided.
+.SH SEE ALSO
+fs(5),
+mkfs.xfs(8),
+xfs_bmap(8),
+xfs_check(8),
+xfs_copy(8),
+xfs_estimate(8),
+xfs_growfs(8),
+xfs_logprint(8),
+xfs_repair(8),
+xfsdump(8),
+xfsrestore(8),
+fcntl(2),
+lvm(8).
diff --git a/man/man8/Makefile b/man/man8/Makefile
new file mode 100644 (file)
index 0000000..9ccd9c4
--- /dev/null
@@ -0,0 +1,49 @@
+#! gmake
+#
+# Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+# 
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# 
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like.  Any license provided herein, whether implied or
+# otherwise, applies only to this software file.  Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write the Free Software Foundation, Inc., 59
+# Temple Place - Suite 330, Boston MA 02111-1307, USA.
+# 
+# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+# Mountain View, CA  94043, or:
+# 
+# http://www.sgi.com 
+# 
+# For further information regarding this notice, see: 
+# 
+# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+#
+
+TOPDIR = ../..
+include $(TOPDIR)/include/builddefs
+
+MAN_SECTION    = 8
+
+MAN_PAGES      = $(shell echo *.$(MAN_SECTION))
+MAN_DEST       = $(XFS_CMDS_MAN_DIR)/man$(MAN_SECTION)
+LSRCFILES      = $(MAN_PAGES)
+
+default : $(MAN_PAGES)
+
+include $(BUILDRULES)
+
+install : default
+       $(INSTALL) -m 755 -d $(MAN_DEST)
+       $(INSTALL_MAN)
diff --git a/man/man8/fsck.xfs.8 b/man/man8/fsck.xfs.8
new file mode 100644 (file)
index 0000000..128691b
--- /dev/null
@@ -0,0 +1,23 @@
+.TH fsck.xfs 8
+.SH NAME
+fsck.xfs \- do nothing, successfully
+.SH SYNOPSIS
+.nf
+\f3fsck.xfs\f1 [ \f3...\f1]
+.fi
+.SH DESCRIPTION
+.I fsck.xfs
+is called by the generic Linux
+.IR fsck (8)
+program at startup to check and repair an XFS filesystem.
+XFS is a journalled filesystem and performs recovery at
+.IR mount (8)
+time if necessary, so
+.I fsck.xfs
+simply exits with a zero exit status.
+.SH FILES
+.IR /etc/fstab .
+.SH SEE ALSO
+fsck(8),
+fstab(5),
+xfs(5).
diff --git a/man/man8/mkfs.xfs.8 b/man/man8/mkfs.xfs.8
new file mode 100644 (file)
index 0000000..7366bcf
--- /dev/null
@@ -0,0 +1,485 @@
+.TH mkfs.xfs 8
+.SH NAME
+mkfs.xfs \- construct an XFS filesystem
+.SH SYNOPSIS
+.nf
+\f3mkfs.xfs\f1 [ \f3\-b\f1 subopt=value ] \c
+[ \f3\-d\f1 subopt[=value] ] [ \f3\-i\f1 subopt=value ]
+       [ \f3\-l\f1 subopt[=value] ] \c
+[ \f3\-n\f1 subopt[=value] ] \c
+[ \f3\-p\f1 protofile ] [ \f3\-q\f1 ] 
+       [ \f3\-r\f1 subopt[=value] ] [ \f3\-C\f1 ] device
+.fi
+.SH DESCRIPTION
+.I mkfs.xfs
+constructs an XFS filesystem by writing on a special
+file using the values found in the arguments of the command line.
+It is invoked automatically by \f2mkfs\f1(8) when \f2mkfs\f1 is
+given the \f3\-t xfs\f1 option.
+.PP
+In its simplest (and most commonly used form), the size of the
+filesystem is determined from the disk driver.  As an example, to make
+a filesystem with an internal log on the first partition on the first
+SCSI disk, use:
+.PP
+.nf
+       mkfs.xfs /dev/sda1
+.fi
+.PP
+The metadata log can be placed on another device to reduce the number
+of disk seeks.  To create a filesystem on the first partition on the
+first SCSI disk with a 10000 block log located on the first partition
+on the second SCSI disk, use:
+.PP
+.nf
+       mkfs.xfs -l logdev=/dev/sdb1,size=10000b /dev/sda1
+.fi
+.PP
+Each of the
+.I subopt=value
+elements in the argument list above can be given as multiple comma-separated
+.I subopt=value
+suboptions if multiple suboptions apply to the same option.
+Equivalently, each main option can be given multiple times with
+different suboptions.
+For example,
+.B \-l internal,size=10000b
+and
+.B \-l internal \-l size=10000b
+are equivalent.
+.PP
+In the descriptions below, sizes are given in bytes, blocks, kilobytes,
+or megabytes.
+Sizes are treated as hexadecimal if prefixed by 0x or 0X,
+octal if prefixed by 0, or decimal otherwise.
+If suffixed with \f3b\f1 then the size is converted by multiplying it
+by the filesystem's block size.
+If suffixed with \f3k\f1 then the size is converted by multiplying it by 1024.
+If suffixed with \f3m\f1 then the size is converted by multiplying it by
+If suffixed with \f3g\f1 then the size is converted by multiplying it by
+1073741824 (1024 * 1024 * 1024).
+.TP
+.B \-b
+Block size options.
+.IP
+This option specifies the fundamental block size of the filesystem.
+The valid suboptions are:
+.BI log= value
+and
+\f3size=\f1\f2value\f1;
+only one can be supplied.
+The block size is specified either as a base two logarithm value with
+.BR log= ,
+or in bytes with
+.BR size= .
+The default value is 4096 bytes (4 KB).  The minimum value for block
+size is 512; the maximum is 65536 (64 KB).  XFS on Linux currently
+only supports 4KB blocks.
+.TP
+.B \-d
+Data section options.
+.IP
+These options specify the location, size, and other parameters of the
+data section of the filesystem.
+The valid suboptions are:
+\f3agcount=\f1\f2value\f1,
+\f3file\f1[\f3=\f1\f2value\f1],
+\f3name=\f1\f2value\f1,
+\f3size=\f1\f2value\f1,
+\f3sunit=\f1\f2value\f1,
+\f3swidth=\f1\f2value\f1,
+and
+\f3unwritten\f1[\f3=\f1\f2value\f1].
+.IP
+The
+.B agcount
+suboption is used to specify the number of allocation groups.
+The data section of the filesystem is divided into allocation groups
+to improve the performance of XFS.
+More allocation groups imply that more parallelism can be achieved
+when allocating blocks and inodes.
+The minimum allocation group size is 16 MB;
+the maximum size is just under 4 GB.
+The data section of the filesystem is divided into
+.I agcount
+allocation groups (default value 8, unless the filesystem is smaller
+than 128 MB or larger than 8 GB).
+Setting
+.I agcount
+to a very large number should be avoided, since this causes an unreasonable
+amount of CPU time to be used when the filesystem is close to full.
+.IP
+The
+.B name
+suboption can be used to specify the name of the special file containing
+the filesystem.
+In this case, the log section must be specified as
+.B internal
+(with a size, see the
+.B \-l
+option below) and there can be no real-time section.
+Note that the default log in this case is an internal log with
+at least 1000 blocks, actual size depending on the filesystem block
+size and the directory block size.
+.IP
+The
+.B file
+suboption is used to specify that the file given by the
+.B name
+suboption is a regular file.
+The suboption value is either 0 or 1,
+with 1 signifying that the file is regular.
+This suboption is used only to make a filesystem image
+(for instance, a miniroot image).
+If the value is omitted then 1 is assumed.
+.IP
+The
+.B size
+suboption is used to specify the size of the data section.
+This suboption is required if
+.B \-d file[=1]
+is given.
+Otherwise, it is only needed if the filesystem should occupy
+less space than the size of the special file.
+.IP
+The
+.B sunit
+suboption is used to specify the stripe unit for a RAID device or a
+logical volume.  The suboption value has to be specified in 512-byte
+block units.  This suboption ensures that data allocations will be
+stripe unit aligned when the current end of file is being extended and
+the file size is larger than 512KB.  Also inode allocations and the
+internal log will be stripe unit aligned.
+.IP
+The
+.B swidth
+suboption is used to specify the stripe width for a RAID device or a
+striped logical volume.
+The suboption value has to be specified in 512-byte block units.
+This suboption is required if
+.B \-d sunit
+has been specified and it has to be a multiple of the 
+.B \-d sunit 
+suboption.
+The stripe width will be the preferred iosize returned in the 
+.IR stat (2)
+system call.
+.IP
+The
+.B unwritten
+suboption is used to specify whether unwritten extents are flagged as such,
+or not.
+The suboption value is either 0 or 1, with 1 signifying that unwritten
+extent flagging should occur.
+If the suboption is omitted, unwritten extent flagging is enabled.
+If unwritten extents are flagged, filesystem write performance
+will be negatively affected for preallocated file extents, since
+extra filesystem transactions are required to convert extent flags 
+for the range of the file written.
+This suboption should be disabled if the filesystem
+needs to be used on operating system versions which do not support the
+flagging capability.
+.TP
+.B \-i
+Inode options.
+.IP
+This option specifies the inode size of the filesystem, and other
+inode allocation parameters.
+The XFS inode contains a fixed-size part and a variable-size part.
+The variable-size part, whose size is affected by this option, can contain:
+directory data, for small directories;
+attribute data, for small attribute sets;
+symbolic link data, for small symbolic links;
+the extent list for the file, for files with a small number of extents;
+and the root of a tree describing the location of extents for the file,
+for files with a large number of extents.
+.IP
+The valid suboptions for specifying inode size are:
+\f3log=\f1\f2value\f1,
+\f3perblock=\f1\f2value\f1,
+and
+\f3size=\f1\f2value\f1;
+only one can be supplied.
+The inode size is specified either as a base two logarithm value with
+.BR log= ,
+in bytes with
+.BR size= ,
+or as the number fitting in a filesystem block with
+.BR perblock= .
+The mininum (and default) value is 256 bytes.
+The maximum value is 2048 (2 KB) subject to the restriction that
+the inode size cannot exceed one half of the filesystem block size.
+.IP
+The option \f3maxpct=\f1\f2value\f1 specifies the maximum percentage
+of space in the filesystem that can be allocated to inodes.
+The default value is 25%.
+Setting the value to 0 means that
+essentially all of the filesystem can become inode blocks.
+.IP
+The option
+.BI align[= value ]
+is used to specify that inode allocation is or is not aligned.
+The value is either 0 or 1,
+with 1 signifying that inodes are allocated aligned.
+If the value is omitted, 1 is assumed.
+The default is that inodes are aligned.
+Aligned inode access is normally more efficient than unaligned access;
+alignment must be established at the time the filesystem is created,
+since inodes are allocated at that time.
+This option can be used to turn off inode alignment when the
+filesystem needs to be mountable by a version of IRIX
+that does not have the inode alignment feature
+(any release of IRIX before 6.2, and IRIX 6.2 without XFS patches).
+.TP
+.B \-l
+Log section options.
+.IP
+These options specify the location, size, and other parameters of the
+log section of the filesystem.
+The valid suboptions are:
+.BI internal[= value ]
+and
+\f3size=\f1\f2value\f1.
+.IP
+The
+.B internal
+suboption is used to specify that the log section is a piece of
+the data section instead of being another device or logical volume.
+The suboption value is either 0 or 1,
+with 1 signifying that the log is internal.
+If the value is omitted, 1 is assumed.
+.IP
+The
+.B size
+suboption is used to specify the size of the log section.
+This suboption is required if
+.B \-l internal[=1]
+is given.
+Otherwise, it is only needed if the log section of the filesystem
+should occupy less space than the size of the special file.
+The size is specified in bytes or blocks, with a \f3b\f1 suffix 
+meaning multiplication by the filesystem block size, as described above.
+The overriding minimum value for size is 512 blocks.
+With some combinations of filesystem block size, inode size,
+and directory block size, the minimum log size is larger than 512 blocks.
+.TP
+.B \-n
+Naming options.
+.IP
+These options specify the version and size parameters for the naming
+(directory) area of the filesystem.
+The valid suboptions are:
+\f3log=\f1\f2value\f1,
+\f3size=\f1\f2value\f1,
+and
+\f3version=\f1\f2value\f1.
+The naming (directory) version is 1 or 2,
+defaulting to 1 if unspecified.
+With version 2 directories,
+the directory block size can be any power of 2 size
+from the filesystem block size up to 65536.
+The block size is specified either as a base two logarithm value with
+.BR log= ,
+or in bytes with
+.BR size= .
+The default size value for version 2 directories is 4096 bytes (4 KB), 
+unless the filesystem block size is larger than 4096,
+in which case the default value is the filesystem block size.
+For version 1 directories the block size is the same as the 
+filesystem block size.
+.TP
+\f3\-p\f1 \f2protofile\f1
+If the optional
+.B \-p
+.I protofile
+argument is given,
+.I mkfs.xfs
+uses
+.I protofile
+as a prototype file
+and takes its directions from that file.
+The blocks and inodes
+specifiers in the
+.I protofile
+are provided for backwards compatibility, but are otherwise unused.
+The prototype file
+contains tokens separated by spaces or
+newlines.
+A sample prototype specification follows (line numbers have been added to
+aid in the explanation):
+.nf
+.sp .8v
+.in +5
+\f71       /stand/\f1\f2diskboot\f1\f7
+2       4872 110
+3       d--777 3 1
+4       usr     d--777 3 1
+5       sh      ---755 3 1 /bin/sh
+6       ken     d--755 6 1
+7               $
+8       b0      b--644 3 1 0 0
+9       c0      c--644 3 1 0 0
+10      fifo    p--644 3 1
+11      slink   l--644 3 1 /a/symbolic/link
+12      :  This is a comment line
+13      $
+14      $\f1
+.in -5
+.fi
+.IP
+Line 1 is a dummy string.
+(It was formerly the bootfilename.)
+It is present for backward
+compatibility; boot blocks are not used on SGI systems.
+.IP
+Note that some string of characters must be present as the first line of
+the proto file to cause it to be parsed correctly; the value
+of this string is immaterial since it is ignored.
+.IP
+Line 2 contains two numeric values (formerly the numbers of blocks and inodes).
+These are also merely for backward compatibility: two numeric values must
+appear at this point for the proto file to be correctly parsed,
+but their values are immaterial since they are ignored.
+.IP
+Lines 3-11 tell
+.I mkfs.xfs
+about files and directories to
+be included in this filesystem.
+Line 3 specifies the root directory.
+Lines 4-6 and 8-10 specifies other directories and files.
+Note the special symbolic link syntax on line 11.
+.IP
+The
+.B $
+on line 7 tells
+.I mkfs.xfs
+to end the branch of the filesystem it is on, and continue
+from the next higher directory.
+It must be the last character
+on a line.
+The colon
+on line 12 introduces a comment; all characters up until the
+following newline are ignored.
+Note that this means you cannot
+have a file in a prototype file whose name contains a colon.
+The
+.B $
+on lines 13 and 14 end the process, since no additional
+specifications follow.
+.IP
+File specifications give the mode,
+the user ID,
+the group ID,
+and the initial contents of the file.
+Valid syntax for the contents field
+depends on the first character of the mode.
+.IP
+The mode for a file is specified by a 6-character string.
+The first character
+specifies the type of the file.
+The character range is
+.B \-bcdpl
+to specify regular, block special,
+character special, directory files, named pipes (fifos), and symbolic
+links, respectively.
+The second character of the mode
+is either
+.B u
+or
+.B \-
+to specify setuserID mode or not.
+The third is
+.B g
+or
+.B \-
+for the setgroupID mode.
+The rest of the mode
+is a three digit octal number giving the
+owner, group, and other read, write, execute
+permissions (see
+.IR chmod (1)).
+.IP
+Two decimal number
+tokens come after the mode; they specify the
+user and group IDs of the owner of the file.
+.IP
+If the file is a regular file,
+the next token of the specification can be a pathname
+from which the contents and size are copied.
+If the file is a block or character special file,
+two decimal numbers
+follow that give the major and minor device numbers.
+If the file is a symbolic link, the next token of the specification
+is used as the contents of the link.
+If the file is a directory,
+.I mkfs.xfs
+makes the entries
+.BR . ""
+and
+.B  ..
+and then
+reads a list of names and
+(recursively)
+file specifications for the entries
+in the directory.
+As noted above, the scan is terminated with the
+token
+.BR $ .
+.TP
+.B \-q
+Quiet option.
+.IP
+Normally
+.I mkfs.xfs
+prints the parameters of the filesystem
+to be constructed;
+the
+.B \-q
+flag suppresses this.
+.TP
+.B \-r
+Real-time section options.
+.IP
+These options specify the location, size, and other parameters of the
+real-time section of the filesystem.
+The valid suboptions are:
+.BI extsize= value
+and
+\f3size=\f1\f2value\f1.
+.IP
+The
+.B extsize
+suboption is used to specify the size of the blocks in the real-time
+section of the filesystem.
+This size must be a multiple of the filesystem block size.
+The minimum allowed value is the filesystem block size
+or 4 KB (whichever is larger);
+the default value is the stripe width for striped volumes or 64 KB for
+non-striped volumes;
+the maximum allowed value is 1 GB.
+The real-time extent size should be carefully chosen to match the
+parameters of the physical media used.
+.IP
+The
+.B size
+suboption is used to specify the size of the real-time section.
+This suboption is only needed if the real-time section of the
+filesystem should occupy
+less space than the size of the partition or logical volume containing the section.
+.TP
+.B \-C
+Disable overlapping partition/volume checks.
+.IP
+By default \f2mkfs.xfs\f1 checks to see if the destination partition or logical
+volume overlaps any mounted or reserved partitions in the system.  If an
+overlap or mount conflict is found, the user will be notified and prevented
+from potentially corrupting the existing data.  For systems with
+a large number of disks, this additional checking may add noticable overhead
+to the command's execution time.  For situations where command performance is
+necessary, this switch may be used to disable the safeguards.  Due to the
+potential for user-error causing corrupted filesystems or other on-disk
+data corruption, we strongly discourage use of this switch in normal operation.
+.SH SEE ALSO
+mkfs(8).
+.SH BUGS
+With a prototype file, it is not possible to specify hard links.
diff --git a/man/man8/xfs_admin.8 b/man/man8/xfs_admin.8
new file mode 100644 (file)
index 0000000..50cfc3e
--- /dev/null
@@ -0,0 +1,68 @@
+.TH xfs_admin 8
+.SH NAME
+xfs_admin \- change parameters of an XFS filesystem
+.SH SYNOPSIS
+.nf
+\f3xfs_admin\f1 [ \f3-lu\f1] [ \f3\-L \f2label\f1 ] [ \f3\-U \f2uuid\f1 ] device
+\f3xfs_admin \-f\f1 [ \f3-lu\f1] [ \f3\-L \f2label\f1 ] [ \f3\-U \f2uuid\f1 ] filename
+.fi
+.SH DESCRIPTION
+.I xfs_admin
+uses the
+.IR xfs_db (8)
+command to modify various parameters of a filesystem.
+.PP
+Devices that are mounted cannot be modified.
+Administrators must unmount filesystems before
+.I xfs_admin
+or
+.I xfs_db
+can convert parameters.
+A number of parameters of a mounted filesystem can be examined
+and modified using the
+.IR xfs_growfs (8)
+command.
+.SH OPTIONS
+.TP 5
+\f3\-f\f1
+Specifies that the filesystem image to be processed is stored in a
+regular file (see the \f2mkfs.xfs\f1 \f3\-d\f1 \f2file\f1 option).
+.TP 5
+\f3\-l\f1
+Print the current filesystem label.
+.TP 5
+\f3\-u\f1
+Print the current filesystem UUID (Universally Unique IDentifier).
+.TP 5
+\f3\-L\f1 \f2label\f1
+Set the filesystem label.
+XFS filesystem labels can be at most 12 characters long; if
+.I label
+is longer than 12 characters,
+.I xfs_admin
+will truncate it and print a warning message.
+The filesystem label can be cleared using the special ``\c
+.BR \-\- ''
+value for
+.IR label .
+.TP 5
+\f3\-U\f1 \f2UUID\f1
+Set the UUID of the filesystem.
+A sample UUID looks like this: "c1b9d5a2-f162-11cf-9ece-0020afc76f16".
+The uuid may also be
+.IR null ,
+which will set the filesystem UUID to the null UUID.
+The uuid may also be
+.IR generate ,
+which will generate a new UUID for the filesystem.
+.PP
+The
+.IR mount (8)
+manual entry describes how to mount a filesystem using its label or UUID,
+rather than its block special device name.
+.SH SEE ALSO
+mkfs.xfs(8),
+mount(8),
+xfs_db(8),
+xfs_growfs(8),
+xfs(5).
diff --git a/man/man8/xfs_bmap.8 b/man/man8/xfs_bmap.8
new file mode 100644 (file)
index 0000000..0d08ec3
--- /dev/null
@@ -0,0 +1,54 @@
+.TH xfs_bmap 8
+.SH NAME
+xfs_bmap \- print block mapping for an XFS file
+.SH SYNOPSIS
+.nf
+\f3xfs_bmap\f1 [ \f3\-a\f1 ] [ \f3\-l\f1 ] [ \f3\-d\f1 ] [ \f3\-n \f2nnn\f1 ] file ...
+.fi
+.SH DESCRIPTION
+.I xfs_bmap
+prints the map of disk blocks used by files in an XFS filesystem.
+The map lists each \f2extent\fP used by the file, as well as regions
+in the file that do not have any corresponding blocks (\f2hole\f1s).
+Each line of the listings takes the following form:
+
+.Ex
+\f2extent\f1\f7: [\f1\f2startoffset\f1\f7..\f1\f2endoffset\f1\f7]: \c
+\f1\f2startblock\f1\f7..\f1\f2endblock\f1
+.Ee
+
+Holes are marked by replacing the \f2startblock..endblock\f1 with \f2hole\fP.
+All the file offsets and disk blocks are in units of 512-byte blocks,
+no matter what the filesystem's block size is.
+.PP
+If portions of the file have been migrated offline by
+a DMAPI application, a DMAPI read event will be generated to
+bring those portions back online before the disk block map is
+printed.  However if the \f3-d\f1 option is used, no DMAPI read event
+will be generated for a DMAPI file and offline portions will be reported as holes.
+.PP
+If the \f3-l\f1 option is used, then
+
+.Ex
+\f1\f2<nblocks>\f1\f7 \f1\f2blocks\f1\f7
+.Ee
+
+will be appended to each line.  \f1\f2Nblocks\f1\f7 is the length
+of the extent described on the line in units of 512-byte blocks.
+.PP
+If the \f3\-a\f1 option is given, information about the file's
+attribute fork is printed instead of the default data fork.
+.PP
+If the \f3\-n \f2nnn\f1 option is given, \f3xfs_bmap\f1 obtains the extent
+list of the file in groups of \f2nnn\f1 extents.
+In the absence of \f3\-n\f1, \f3xfs_bmap\f1 queries the system for
+the number of extents in the file and uses that value to compute 
+the group size.
+.SH DIAGNOSTICS
+.TP 10
+\f7fcntl(F_GETBMAPX) \f1\f2filename\f1\f7: Invalid argument\f1
+The file \f2filename\f1 is not in an XFS filesystem.
+.SH SEE ALSO
+fcntl(2),
+lvm(8).
+
diff --git a/man/man8/xfs_check.8 b/man/man8/xfs_check.8
new file mode 100644 (file)
index 0000000..96480f4
--- /dev/null
@@ -0,0 +1,177 @@
+.TH xfs_check 8
+.SH NAME
+xfs_check \- check XFS filesystem consistency
+.SH SYNOPSIS
+.nf
+\f3xfs_check\f1 [ \f3\-i\f1 ino ] ... [ \f3\-b\f1 bno ] ... \c
+[ \f3\-s\f1 ] [ \f3\-v\f1 ] xfs_special
+.sp .8v
+\f3xfs_check\f1 \f3\-f\f1 [ \f3\-i\f1 ino ] ... [ \f3\-b\f1 bno ] ... \c
+[ \f3\-s\f1 ] [ \f3\-v\f1 ] file
+.fi
+.SH DESCRIPTION
+.I xfs_check
+checks whether an XFS filesystem is consistent.
+It is normally run only when there is reason to believe that the
+filesystem has a consistency problem.
+The filesystem to be checked is specified by the
+.I xfs_special
+argument, which should be the disk or volume device for the filesystem.
+Filesystems stored in files can also be checked, using the \f3\-f\f1 flag.
+The filesystem should normally be unmounted or read-only
+during the execution of
+.IR xfs_check .
+Otherwise, spurious problems are reported.
+.PP
+The options to \f2xfs_check\f1 are:
+.TP 9
+.B \-f
+Specifies that the special device is actually a file (see the
+\f2mkfs.xfs\f1 \f3\-d\f1 \f2file\f1 option).
+This might happen if an image copy
+of a filesystem has been made into an ordinary file.
+.TP
+.B \-s
+Specifies that only serious errors should be reported.
+Serious errors are those that make it impossible to find major data
+structures in the filesystem.
+This option can be used to cut down the
+amount of output when there is a serious problem, when the output might make it
+difficult to see what the real problem is.
+.TP
+.B \-v
+Specifies verbose output; it is impossibly long for a
+reasonably-sized filesystem.
+This option is intended for internal use only.
+.TP
+.BI \-i " ino"
+Specifies verbose behavior for a
+specific inode.
+For instance, it can be used to locate all the blocks
+associated with a given inode.
+.TP
+.BI \-b " bno"
+Specifies verbose behavior for a specific filesystem block.
+For instance, it can be used to determine what a specific block
+is used for.
+The block number is a "file system block number".
+Conversion between disk addresses (i.e. addresses reported by
+.IR xfs_bmap )
+and file system blocks may be accomplished using
+.IR xfs_db 's
+.B convert
+command.
+.PP
+Any non-verbose output from
+.I xfs_check
+means that the filesystem has an inconsistency.
+The filesystem can be repaired using either
+.IR xfs_repair(8)
+to fix the filesystem in place,
+or by using
+.IR xfsdump (8)
+and
+.IR mkfs.xfs (8)
+to dump the filesystem,
+make a new filesystem,
+then use
+.IR xfsrestore (8)
+to restore the data onto the new filesystem.
+Note that xfsdump may fail on a corrupt filesystem.
+However, if the filesystem is mountable, xfsdump can
+be used to try and save important data before
+repairing the filesystem with xfs_repair.
+If the filesystem is not mountable though, xfs_repair is
+the only viable option.
+.SH DIAGNOSTICS
+Under one circumstance,
+.I xfs_check
+unfortunately might dump core
+rather than produce useful output.
+If the filesystem is completely corrupt, a core dump might
+be produced instead of the message
+.Ex
+\f2xxx\f1\f7 is not a valid filesystem\f1
+.Ee
+.PP
+If the filesystem is very large (has many files) then
+.I xfs_check
+might run out of memory.
+In this case the message
+.Ex
+out of memory
+.Ee
+is printed.
+.PP
+The following is a description of the most likely problems and the associated
+messages.
+Most of the diagnostics produced are only meaningful with an understanding
+of the structure of the filesystem.
+.TP
+\f7agf_freeblks \f1\f2n\f1\f7, counted \f1\f2m\f1\f7 in ag \f1\f2a\f1
+The freeblocks count in the allocation group header for allocation group
+.I a
+doesn't match the number of blocks counted free.
+.TP
+\f7agf_longest \f1\f2n\f1\f7, counted \f1\f2m\f1\f7 in ag \f1\f2a\f1
+The longest free extent in the allocation group header for allocation group
+.I a
+doesn't match the longest free extent found in the allocation group.
+.TP
+\f7agi_count \f1\f2n\f1\f7, counted \f1\f2m\f1\f7 in ag \f1\f2a\f1
+The allocated inode count in the allocation group header for allocation group
+.I a
+doesn't match the number of inodes counted in the allocation group.
+.TP
+\f7agi_freecount \f1\f2n\f1\f7, counted \f1\f2m\f1\f7 in ag \f1\f2a\f1
+The free inode count in the allocation group header for allocation group
+.I a
+doesn't match the number of inodes counted free in the allocation group.
+.TP
+\f7block \f1\f2a/b\f1\f7 expected inum 0 got \f1\f2i\f1
+The block number is specified as a pair
+(allocation group number, block in the allocation group).
+The block is used multiple times (shared), between multiple inodes.
+This message usually follows a message of the next type.
+.TP
+\f7block \f1\f2a/b\f1\f7 expected type unknown got \f1\f2y\f1
+The block is used multiple times (shared).
+.TP
+\f7block \f1\f2a/b\f1\f7 type unknown not expected\f1
+The block is unaccounted for (not in the freelist and not in use).
+.TP
+\f7link count mismatch for inode \f1\f2nnn\f1\f7 (name \f1\f2xxx\f1\f7), nlink \f1\f2m\f1\f7, counted \f1\f2n\f1
+The inode has a bad link count (number of references in directories).
+.TP
+\f7rtblock \f1\f2b\f1\f7 expected inum 0 got \f1\f2i\f1
+The block is used multiple times (shared), between multiple inodes.
+This message usually follows a message of the next type.
+.TP
+\f7rtblock \f1\f2b\f1\f7 expected type unknown got \f1\f2y\f1
+The real-time block is used multiple times (shared).
+.TP
+\f7rtblock \f1\f2b\f1\f7 type unknown not expected\f1
+The real-time block is unaccounted for (not in the freelist and not in use).
+.TP
+\f7sb_fdblocks \f1\f2n\f1\f7, counted \f1\f2m\f1
+The number of free data blocks recorded
+in the superblock doesn't match the number counted free in the filesystem.
+.TP
+\f7sb_frextents \f1\f2n\f1\f7, counted \f1\f2m\f1
+The number of free real-time extents recorded
+in the superblock doesn't match the number counted free in the filesystem.
+.TP
+\f7sb_icount \f1\f2n\f1\f7, counted \f1\f2m\f1
+The number of allocated inodes recorded
+in the superblock doesn't match the number allocated in the filesystem.
+.TP
+\f7sb_ifree \f1\f2n\f1\f7, counted \f1\f2m\f1
+The number of free inodes recorded
+in the superblock doesn't match the number free in the filesystem.
+.SH SEE ALSO
+mkfs.xfs(8),
+xfsdump(8),
+xfsrestore(8),
+xfs_ncheck(8),
+xfs_repair(8),
+xfs(5).
diff --git a/man/man8/xfs_db.8 b/man/man8/xfs_db.8
new file mode 100644 (file)
index 0000000..82c2ad9
--- /dev/null
@@ -0,0 +1,1187 @@
+.TH xfs_db 8
+.SH NAME
+xfs_db \- debug an XFS filesystem
+.SH SYNOPSIS
+.nf
+\f3xfs_db\f1 [ \f3\-c\f1 cmd ] ... [ \f3\-p\f1 prog ] [ \f3\-r\f1 ] [ \f3\-x\f1 ] xfs_special
+.sp .8v
+\f3xfs_db\f1 \f3\-f\f1 [ \f3\-c\f1 cmd ] ... [ \f3\-p\f1 prog ] [ \f3\-f\f1 ] [ \f3\-r\f1 ] [ \f3\-x\f1 ] file
+.fi
+.SH DESCRIPTION
+\f2xfs_db\f1 is used to examine an XFS filesystem.
+Under rare circumstances it can also be used to modify an XFS filesystem,
+but that task is normally left to \f2xfs_repair\f1(8) or to
+scripts such as \f2xfs_chver\f1 that run \f2xfs_db\f1.
+.PP
+The options to \f2xfs_db\f1 are:
+.TP 10
+\f3\-c\f1 \f2cmd\f1
+\f2xfs_db\f1 commands may be run interactively (the default)
+or as arguments on the command line.
+Multiple \f3\-c\f1 arguments may be given.
+The commands are run in the sequence given, then the program exits.
+This is the mechanism used to implement \f2xfs_check\f1(8).
+.TP
+\f3\-f\f1
+Specifies that the filesystem image to be processed is stored in a 
+regular file
+(see the \f2mkfs.xfs\f1 \f3\-d\f1 \f2file\f1 option).
+This might happen if an image copy
+of a filesystem has been made into an ordinary file with \f2xfs_copy\f1(8).
+.TP
+\f3\-i\f1
+Allows execution on a mounted filesystem, provided it is mounted read-only.
+Useful for shell scripts such as \f2xfs_check\f1(8), which must only
+operate on filesystems in a guarenteed consistent state
+(either unmounted or mounted read-only).
+These semantics are slightly different to that of the \f3\-r\f1 option.
+.TP
+\f3\-p\f1 \f2prog\f1
+Set the program name for prompts and some error messages,
+the default value is \f2xfs_db\f1.
+.TP
+\f3\-r\f1
+Open \f2file\f1 or \f2xfs_special\f1 read-only.
+This option is required if \f2xfs_special\f1 is a mounted filesystem.
+It is only necessary to omit this flag if a command that changes data
+(\f3write\f1, \f3blocktrash\f1) is to be used.
+.TP
+\f3\-x\f1
+Specifies expert mode.
+This enables the \f3write\f1 command.
+.SH CONCEPTS
+\f2xfs_db\f1 commands can be broken up into two classes.
+Most commands are for the navigation and display of data structures in
+the filesystem.
+Other commands are for scanning the filesystem in some way.
+.PP
+Commands which are used to navigate the filesystem structure take arguments
+which reflect the names of filesystem structure fields.
+There can be multiple field names separated by dots when the underlying
+structures are nested, as in C.
+The field names can be indexed (as an array index)
+if the underlying field is an array.
+The array indices can be specified as a range, two numbers separated by a dash.
+.PP
+\f2xfs_db\f1 maintains a current address in the filesystem.
+The granularity of the address is a filesystem structure.
+This can be a filesystem block,
+an inode or quota (smaller than a filesystem block),
+or a directory block (could be larger than a filesystem block).
+There are a variety of commands to set the current address.
+Associated with the current address is the current data type,
+which is the structural type of this data.
+Commands which follow the structure of the filesystem always set the type
+as well as the address.
+Commands which examine pieces of an individual file (inode) need the current
+inode to be set, this is done with the \f3inode\f1 command.
+.PP
+The current address/type information is actually maintained in a
+stack that can be explicitly manipulated with the
+\f3push\f1, \f3pop\f1, and \f3stack\f1 commands.
+This allows for easy examination of a nested filesystem structure.
+Also, the last several locations visited are stored in a ring buffer
+which can be manipulated with the
+\f3forward\f1, \f3back\f3, and \f3ring\f1 commands.
+.PP
+XFS filesystems are divided into a small number of allocation groups.
+\f2xfs_db\f1 maintains a notion of the current allocation group which is
+manipulated by some commands.
+The initial allocation group is 0.
+.SH COMMANDS
+.PP
+Many commands have extensive online help.
+Use the \f3help\f1 command for more details on any command.
+.TP 10
+\f3a\f1
+See the \f3addr\f1 command.
+.TP
+\f3ablock\f1 \f2filoff\f1
+Set current address to the offset \f2filoff\f1 (a filesystem block number)
+in the attribute area of the current inode.
+.TP
+\f3addr\f1 [ \f2field-expression\f1 ]
+Set current address to the value of the \f2field-expression\f1.
+This is used to ``follow'' a reference in one structure to the object
+being referred to.
+If no argument is given the current address is printed.
+.TP
+\f3agf\f1 [ \f2agno\f1 ]
+Set current address to the AGF block for allocation group \f2agno\f1.
+If no argument is given use the current allocation group.
+.TP
+\f3agfl\f1 [ \f2agno\f1 ]
+Set current address to the AGFL block for allocation group \f2agno\f1.
+If no argument is given use the current allocation group.
+.TP
+\f3agi\f1 [ \f2agno\f1 ]
+Set current address to the AGI block for allocation group \f2agno\f1.
+If no argument is given use the current allocation group.
+.TP
+\f3b\f1
+See the \f3back\f1 command.
+.TP
+\f3back\f1
+Move to the previous location in the position ring.
+.TP
+\f3blockfree\f1
+Free block usage information collected by the last execution of the
+\f3blockget\f1 command.
+This must be done before another \f3blockget\f1 command can be given,
+presumably with different arguments than the previous one.
+.TP
+\f3blockget\f1 [ \f3\-npsv\f1 ] [ \f3\-b\f1 \f2bno\f1 ] ... [ \f3\-i\f1 \f2ino\f1 ] ...
+Get block usage and check filesystem consistency.
+The information is saved for use by a subsequent
+\f3blockuse\f1, \f3ncheck\f1, or \f3blocktrash\f1 command.
+See \f2xfs_check\f1(8) for more information.
+.br
+The \f3\-b\f1 option is used to specify filesystem block numbers
+about which verbose information should be printed.
+.br
+The \f3\-i\f1 option is used to specify inode numbers about which
+verbose information should be printed.
+.br
+The \f3\-n\f1 option is used to save pathnames for inodes visited,
+this is used to support the \f2xfs_ncheck\f1(8) command.
+It also means that pathnames will be printed for inodes that have problems.
+This option uses a lot of memory so is not enabled by default.
+.br
+The \f3\-p\f1 option causes error messages to be prefixed with the
+filesystem name being processed.
+This is useful if several copies of \f2xfs_db\f1 are run in parallel.
+.br
+The \f3\-s\f1 option restricts output to severe errors only.
+This is useful if the output is too long otherwise.
+.br
+The \f3\-v\f1 option enables verbose output.
+Messages will be printed for every block and inode processed.
+.TP
+\f3blocktrash\f1 [ \f3\-n\f1 \f2c\f1 ] [ \f3\-x\f1 \f2a\f1 ] [ \f3\-y\f1 \f2b\f1 ] [ \f3\-s\f1 \f2s\f1 ] [ \f3\-0123\f1 ] [ \f3\-t\f1 \f2t\f1 ] ...
+Trash randomly selected filesystem metadata blocks.
+Trashing occurs to randomly selected bits in the chosen blocks.
+This command is available only in debugging versions of \f2xfs_db\f1.
+It is useful for testing \f2xfs_repair\f1(8) and \f2xfs_check\f1(8).
+.br
+The \f3\-0\f1, \f3\-1\f1, \f3\-2\f1, and \f3\-3\f1 options (mutually exclusive)
+set the operating mode for \f3blocktrash\f1.
+In \f3\-0\f1 mode, changed bits are cleared.
+In \f3\-1\f1 mode, changed bits are set.
+In \f3\-2\f1 mode, changed bits are inverted.
+In \f3\-3\f1 mode, changed bits are randomized.
+.br
+The \f3\-n\f1 option supplies the count of block-trashings to perform
+(default 1).
+.br
+The \f3\-s\f1 option supplies a seed to the random processing.
+.br
+The \f3\-t\f1 option gives a type of blocks to be selected
+for trashing.
+Multiple \f3\-t\f1 options may be given.
+If no \f3\-t\f1 options are given then all metadata types can be trashed.
+.br
+The \f3\-x\f1 option sets the minimum size of bit range to be trashed.
+The default value is 1.
+.br
+The \f3\-y\f1 option sets the maximum size of bit range to be trashed.
+The default value is 1024.
+.TP
+\f3blockuse\f1 [ \f3\-n\f1 ] [ \f3\-c\f1 \f2blockcount\f1 ]
+Print usage for current filesystem block(s).
+For each block, the type and (if any) inode are printed.
+.br
+The \f3\-c\f1 option specifies a count of blocks to process.
+The default value is 1 (the current block only).
+.br
+The \f3\-n\f1 option specifies that file names should be printed.
+The prior \f3blockget\f1 command must have also specified the \f3\-n\f1 option.
+.TP
+\f3bmap\f1 [ \f3\-a\f1 ] [ \f3\-d\f1 ] [ \f2block\f1 [ \f2len\f1 ] ]
+Show the block map for the current inode.
+The map display can be restricted to an area of the file with the
+\f2block\f1 and \f2len\f1 arguments.
+If \f2block\f1 is given and \f2len\f1 is omitted then 1 is assumed for len.
+.br
+The \f3\-a\f1 and \f3\-d\f1 options are used to select the attribute or data
+area of the inode, if neither option is given then both areas are shown.
+.TP
+\f3check\f1
+See the \f3blockget\f1 command.
+.TP
+\f3convert\f1 \f2type\f1 \f2number\f1 [ \f2type\f1 \f2number\f1 ] ... \f2type\f1
+Convert from one address form to another.
+The known \f2type\f1s, with alternate names, are:
+\f3agblock\f1 or \f3agbno\f1 (filesystem block within an allocation group),
+\f3agino\f1 or \f3aginode\f1 (inode number within an allocation group),
+\f3agnumber\f1 or \f3agno\f1 (allocation group number),
+\f3bboff\f1 or \f3daddroff\f1 (byte offset in a \f3daddr\f1),
+\f3blkoff\f1 or \f3fsboff\f1 or \f3agboff\f1 (byte offset in a \f3agblock\f1
+or \f3fsblock\f1),
+\f3byte\f1 or \f3fsbyte\f1 (byte address in filesystem),
+\f3daddr\f1 or \f3bb\f1 (disk address, 512-byte blocks),
+\f3fsblock\f1 or \f3fsb\f1 or \f3fsbno\f1 (filesystem block, see the
+\f3fsblock\f1 command),
+\f3ino\f1 or \f3inode\f1 (inode number),
+\f3inoidx\f1 or \f3offset\f1 (index of inode in filesystem block),
+and \f3inooff\f1 or \f3inodeoff\f1 (byte offset in inode).
+Only conversions that ``make sense'' are allowed.
+The compound form (with more than three arguments) is useful for
+conversions such as
+\f3convert\f1 \f3agno\f1 \f2ag\f1 \f3agbno\f1 \f2agb\f1 \f3fsblock\f1.
+.TP
+\f3daddr\f1 [ \f2d\f1 ]
+Set current address to the daddr (512 byte block) given by \f2d\f1.
+If no value for \f2d\f1 is given the current address is printed,
+expressed as a daddr.
+The type is set to \f3data\f1 (uninterpreted).
+.TP
+\f3dblock\f1 \f2filoff\f1
+Set current address to the offset \f2filoff\f1 (a filesystem block number)
+in the data area of the current inode.
+.TP
+\f3debug\f1 [ \f2flagbits\f1 ]
+Set debug option bits.
+These are used for debugging \f2xfs_db\f1.
+If no value is given for \f2flagbits\f1, print the current debug option bits.
+These are for the use of the implementor.
+.TP
+\f3dquot\f1 [ \f2projectid_or_userid\f1 ]
+Set current address to a project or user quota block.
+.TP
+\f3echo\f1 [ \f2arg\f1 ] ...
+Echo the arguments to the output.
+.TP
+\f3f\f1
+See the \f3forward\f1 command.
+.TP
+\f3forward\f1
+Move forward to the next entry in the position ring.
+.TP
+\f3frag\f1 [ \f3\-adflqRrv\f1 ]
+Get file fragmentation data.
+This prints information about fragmentation of file data in the filesystem
+(as opposed to fragmentation of freespace,
+for which see the \f3freesp\f1 command).
+Every file in the filesystem is examined to see how far from ideal
+its extent mappings are.
+A summary is printed giving the totals.
+.br
+The \f3\-v\f1 option sets verbosity,
+every inode has information printed for it.
+The remaining options select which inodes and extents are examined.
+If no options are given then all are assumed set,
+otherwise just those given are enabled.
+.br
+The \f3\-a\f1 option enables processing of attribute data.
+.br
+The \f3\-d\f1 option enables processing of directory data.
+.br
+The \f3\-f\f1 option enables processing of regular file data.
+.br
+The \f3\-l\f1 option enables processing of symbolic link data.
+.br
+The \f3\-q\f1 option enables processing of quota file data.
+.br
+The \f3\-R\f1 option enables processing of realtime control file data.
+.br
+The \f3\-r\f1 option enables processing of realtime file data.
+.TP
+\f3freesp\f1 [ \f3\-bcds\f1 ] [ \f3\-a\f1 \f2a\f1 ] ... [ \f3\-e\f1 \f2i\f1 ] [ \f3\-h\f1 \f2h1\f1 ] ... [ \f3\-m\f1 \f2m\f1 ]
+Summarize free space for the filesystem.
+The free blocks are examined and totalled,
+and displayed in the form of a histogram,
+with a count of extents in each range of free extent sizes.
+.br
+The \f3\-a\f1 \f2a\f1 option adds \f2a\f1 to the list of
+allocation groups to be processed.
+If no \f3\-a\f1 options are given then all allocation groups are processed.
+.br
+The \f3\-b\f1 option specifies that the histogram buckets are binary-sized,
+with the starting sizes being the powers of 2.
+.br
+The \f3\-c\f1 option specifies that \f3freesp\f1 will search the
+by-size (cnt) space Btree instead of the default by-block (bno) space Btree.
+.br
+The \f3\-d\f1 option specifies that every free extent will be displayed.
+.br
+The \f3\-e\f1 \f2i\f1 option specifies that the histogram buckets are
+equal-sized, with the size specified as \f2i\f1.
+.br
+The \f3\-h\f1 \f2h1\f1 option specifies a starting block number
+for a histogram bucket as \f2h1\f1.
+Multiple \f3\-h\f1 options are given to specify the complete set of buckets.
+.br
+The \f3\-m\f1 \f2m\f1 option specifies that the histogram
+starting block numbers are powers of \f2m\f1.
+This is the general case of \f3\-b\f1.
+.br
+The \f3\-s\f1 option specifies that a final summary of total free extents,
+free blocks, and the average free extent size is printed.
+.TP
+\f3fsb\f1
+See the \f3fsblock\f1 command.
+.TP
+\f3fsblock\f1 [ \f2fsb\f1 ]
+Set current address to the fsblock value given by \f2fsb\f1.
+If no value for \f2fsb\f1 is given the current address is printed,
+expressed as an fsb.
+The type is set to \f3data\f1 (uninterpreted).
+XFS filesystem block numbers are computed
+((\f2agno\f1 << \f2agshift\f1) | \f2agblock\f1)
+where \f2agshift\f1 depends on the size of an allocation group.
+Use the \f3convert\f1 command to convert to and from this form.
+Block numbers given for file blocks
+(for instance from the \f3bmap\f1 command)
+are in this form.
+.TP
+\f3hash\f1 \f2string\f1
+Prints the hash value of \f2string\f1 using the hash function of the XFS
+directory and attribute implementation.
+.TP
+\f3help\f1 [ \f2command\f1 ]
+Print help for one or all commands.
+.TP
+\f3inode\f1 [ \f2inode#\f1 ]
+Set the current inode number.
+If no \f2inode#\f1 is given, print the current inode number.
+.TP
+\f3log\f1 [ \f3stop\f1 | \f3start\f1 \f2filename\f1 ]
+Start logging output to \f2filename\f1, stop logging,
+or print the current logging status.
+.TP
+\f3ncheck\f1 [ \f3\-s\f1 ] [ \f3\-i\f1 \f2ino\f1 ] ...
+Print name-inode pairs.
+A \f3blockget -n\f1 command must be run first to gather the information.
+.br
+The \f3\-i\f1 option specifies an inode number to be printed.
+If no \f3\-i\f1 options are given then all inodes are printed.
+.br
+The \f3\-s\f1 option specifies that only setuid and setgid files are printed.
+.TP
+\f3p\f1
+See the \f3print\f1 command.
+.TP
+\f3pop\f1
+Pop location from the stack.
+.TP
+\f3print\f1 [ \f2field-expression\f1 ] ...
+Print field values.
+If no argument is given, print all fields in the current structure.
+.TP
+\f3push\f1 [ \f2command\f1 ]
+Push location to the stack.
+If \f2command\f1 is supplied,
+set the current location to the results of \f2command\f1
+after pushing the old location.
+.TP
+\f3q\f1
+See the \f3quit\f1 command.
+.TP
+\f3quit\f1
+Exit \f2xfs_db\f1.
+.TP
+\f3ring\f1 [ \f2index\f1 ]
+Show position ring (if no \f2index\f1 argument is given),
+or move to a specific entry in the position ring given by \f2index\f1.
+.TP
+\f3sb\f1 [ \f2agno\f1 ]
+Set current address to SB header in allocation group \f2agno\f1.
+If no \f2agno\f1 is given use the current allocation group number.
+.TP
+\f3source\f1 \f2source-file\f1
+Process commands from \f2source-file\f1.
+\f3source\f1 commands can be nested.
+.TP
+\f3stack\f1
+View the location stack.
+.TP
+\f3type\f1 [ \f2type\f1 ]
+Set the current data type to \f2type\f1.
+If no argument is given, show the current data type.
+The possible data types are:
+\f3agf\f1, \f3agfl\f1, \f3agi\f1, \f3attr\f1, \f3bmapbta\f1, \f3bmapbtd\f1,
+\f3bnobt\f1, \f3cntbt\f1, \f3data\f1, \f3dir\f1, \f3dir2\f1, \f3dqblk\f1,
+\f3inobt\f1, \f3inode\f1, \f3log\f1, \f3rtbitmap\f1, \f3rtsummary\f1,
+\f3sb\f1, and \f3symlink\f1.
+See the TYPES section below for more information on these data types.
+.TP
+\f3write\f1 [ \f2field or value\f1 ] ...
+Write a value to disk.
+Specific fields can be set in structures (struct mode),
+or a block can be set to data values (data mode),
+or a block can be set to string values (string mode, for symlink blocks).
+The operation happens immediately: there is no buffering.
+.br
+Struct mode is in effect when the current type is structural,
+i.e. not data.
+For struct mode, the syntax is ``\f3write\f1 \f2field\f1 \f2value\f1''.
+.br
+Data mode is in effect when the current type is data.
+In this case the contents of the block can be shifted or rotated left or right,
+or filled with a sequence, a constant value, or a random value.
+In this mode \f3write\f1 with no arguments gives more information on
+the allowed commands.
+.SH TYPES
+This section gives the fields in each structure type and their meanings.
+Note that some types of block cover multiple actual structures,
+for instance directory blocks.
+.TP 10
+\f3agf\f1
+The AGF block is the header for block allocation information;
+it is in the second 512-byte block of each allocation group.
+The following fields are defined:
+.br
+\f3magicnum\f1: AGF block magic number, 0x58414746 ('XAGF')
+.br
+\f3versionnum\f1: version number, currently 1
+.br
+\f3seqno\f1: sequence number starting from 0
+.br
+\f3length\f1: size in filesystem blocks of the allocation group.
+All allocation groups except the last one of the filesystem have
+the superblock's \f3agblocks\f1 value here
+.br
+\f3bnoroot\f1: block number of the root of the Btree holding free space
+information sorted by block number
+.br
+\f3cntroot\f1: block number of the root of the Btree holding free space
+information sorted by block count
+.br
+\f3bnolevel\f1: number of levels in the by-block-number Btree
+.br
+\f3cntlevel\f1: number of levels in the by-block-count Btree
+.br
+\f3flfirst\f1: index into the AGFL block of the first active entry
+.br
+\f3fllast\f1: index into the AGFL block of the last active entry
+.br
+\f3flcount\f1: count of active entries in the AGFL block
+.br
+\f3freeblks\f1: count of blocks represented in the freespace Btrees
+.br
+\f3longest\f1: longest free space represented in the freespace Btrees
+.TP
+\f3agfl\f1
+The AGFL block contains block numbers for use of the block allocator;
+it is in the fourth 512-byte block of each allocation group.
+Each entry in the active list is a block number within the allocation group
+that can be used for any purpose if space runs low.
+The AGF block fields \f3flfirst\f1, \f3fllast\f1, and \f3flcount\f1
+designate which entries are currently active.
+Entry space is allocated in a circular manner within the AGFL block.
+Fields defined:
+.br
+\f3bno\f1: array of all block numbers.
+Even those which are not active are printed
+.TP
+\f3agi\f1
+The AGI block is the header for inode allocation information;
+it is in the third 512-byte block of each allocation group.
+Fields defined:
+.br
+\f3magicnum\f1: AGI block magic number, 0x58414749 ('XAGI')
+.br
+\f3versionnum\f1: version number, currently 1
+.br
+\f3seqno\f1: sequence number starting from 0
+.br
+\f3length\f1: size in filesystem blocks of the allocation group
+.br
+\f3count\f1: count of inodes allocated
+.br
+\f3root\f1: block number of the root of the Btree holding inode allocation
+information
+.br
+\f3level\f1: number of levels in the inode allocation Btree
+.br
+\f3freecount\f1: count of allocated inodes that are not in use
+.br
+\f3newino\f1: last inode number allocated
+.br
+\f3dirino\f1: unused
+.br
+\f3unlinked\f1: an array of inode numbers within the allocation group.
+The entries in the AGI block are the heads of lists which run through the
+inode \f3next_unlinked\f1 field.
+These inodes are to be unlinked the next time the filesystem is mounted
+.TP
+\f3attr\f1
+An attribute fork is organized as a Btree with the actual data
+embedded in the leaf blocks.
+The root of the Btree is found in block 0 of the fork.
+The index (sort order) of the Btree is the hash value of the attribute name.
+All the blocks contain a \f3blkinfo\f1 structure at the beginning,
+see type \f3dir\f1 for a description.
+Nonleaf blocks are identical in format to those for version 1 and
+version 2 directories, see type \f3dir\f1 for a description.
+Leaf blocks can refer to ``local'' or ``remote'' attribute values.
+Local values are stored directly in the leaf block.
+Remote values are stored in an independent block in the attribute fork
+(with no structure).
+Leaf blocks contain the following fields:
+.br
+\f3hdr\f1: header containing
+a \f3blkinfo\f1 structure \f3info\f1 (magic number 0xfbee),
+a \f3count\f1 of active entries,
+\f3usedbytes\f1 total bytes of names and values,
+the \f3firstused\f1 byte in the name area,
+\f3holes\f1 set if the block needs compaction,
+and array \f3freemap\f1 as for \f3dir\f1 leaf blocks
+.br
+\f3entries\f1: array of structures containing
+a \f3hashval\f1,
+\f3nameidx\f1 (index into the block of the name),
+and flags \f3incomplete\f1,
+\f3root\f1,
+and \f3local\f1
+.br
+\f3nvlist\f1: array of structures describing the attribute names and values.
+Fields always present:
+\f3valuelen\f1 (length of value in bytes),
+\f3namelen\f1,
+and \f3name\f1.
+Fields present for local values:
+\f3value\f1 (value string).
+Fields present for remote values:
+\f3valueblk\f1 (fork block number of containing the value).
+.TP
+\f3bmapbt\f1
+Files with many extents in their data or attribute fork will have the
+extents described by the contents of a Btree for that fork,
+instead of being stored directly in the inode.
+Each bmap Btree starts with a root block contained within the inode.
+The other levels of the Btree are stored in filesystem blocks.
+The blocks are linked to sibling left and right blocks at each level,
+as well as by pointers from parent to child blocks.
+Each block contains the following fields:
+.br
+\f3magic\f1: bmap Btree block magic number, 0x424d4150 ('BMAP')
+.br
+\f3level\f1: level of this block above the leaf level
+.br
+\f3numrecs\f1: number of records or keys in the block
+.br
+\f3leftsib\f1: left (logically lower) sibling block, 0 if none
+.br
+\f3rightsib\f1: right (logically higher) sibling block, 0 if none
+.br
+\f3recs\f1: [leaf blocks only] array of extent records.
+Each record contains
+\f3startoff\f1,
+\f3startblock\f1,
+\f3blockcount\f1,
+and \f3extentflag\f1 (1 if the extent is unwritten)
+.br
+\f3keys\f1: [nonleaf blocks only] array of key records.
+These are the first key value of each block in the level below this one.
+Each record contains \f3startoff\f1
+.br
+\f3ptrs\f1: [nonleaf blocks only] array of child block pointers.
+Each pointer is a filesystem block number to the next level in the Btree
+.TP
+\f3bnobt\f1
+There is one set of filesystem blocks forming the by-block-number allocation
+Btree for each allocation group.
+The root block of this Btree is designated by the \f3bnoroot\f1 field in the
+coresponding AGF block.
+The blocks are linked to sibling left and right blocks at each level,
+as well as by pointers from parent to child blocks.
+Each block has the following fields:
+.br
+\f3magic\f1: BNOBT block magic number, 0x41425442 ('ABTB')
+.br
+\f3level\f1: level number of this block, 0 is a leaf
+.br
+\f3numrecs\f1: number of data entries in the block
+.br
+\f3leftsib\f1: left (logically lower) sibling block, 0 if none
+.br
+\f3rightsib\f1: right (logically higher) sibling block, 0 if none
+.br
+\f3recs\f1: [leaf blocks only] array of freespace records.
+Each record contains
+\f3startblock\f1
+and \f3blockcount\f1
+.br
+\f3keys\f1: [nonleaf blocks only] array of key records.
+These are the first value of each block in the level below this one.
+Each record contains 
+\f3startblock\f1
+and \f3blockcount\f1
+.br
+\f3ptrs\f1: [nonleaf blocks only] array of child block pointers.
+Each pointer is a block number within the allocation group to the next level
+in the Btree
+.TP
+\f3cntbt\f1
+There is one set of filesystem blocks forming the by-block-count allocation
+Btree for each allocation group.
+The root block of this Btree is designated by the \cntroot\f1 field in the
+coresponding AGF block.
+The blocks are linked to sibling left and right blocks at each level,
+as well as by pointers from parent to child blocks.
+Each block has the following fields:
+.br
+\f3magic\f1: CNTBT block magic number, 0x41425443 ('ABTC')
+.br
+\f3level\f1: level number of this block, 0 is a leaf
+.br
+\f3numrecs\f1: number of data entries in the block
+.br
+\f3leftsib\f1: left (logically lower) sibling block, 0 if none
+.br
+\f3rightsib\f1: right (logically higher) sibling block, 0 if none
+.br
+\f3recs\f1: [leaf blocks only] array of freespace records.
+Each record contains 
+\f3startblock\f1
+and \f3blockcount\f1
+.br
+\f3keys\f1: [nonleaf blocks only] array of key records.
+These are the first value of each block in the level below this one.
+Each record contains 
+\f3blockcount\f1
+and \f3startblock\f1
+.br
+\f3ptrs\f1: [nonleaf blocks only] array of child block pointers.
+Each pointer is a block number within the allocation group to the next level
+in the Btree
+.TP
+\f3data\f1
+User file blocks, and other blocks whose type is unknown,
+have this type for display purposes in \f2xfs_db\f1.
+The block data is displayed in hexadecimal format.
+.TP
+\f3dir\f1
+A version 1 directory is organized as a Btree with the directory data
+embedded in the leaf blocks.
+The root of the Btree is found in block 0 of the file.
+The index (sort order) of the Btree is the hash value of the entry name.
+All the blocks contain a \f3blkinfo\f1 structure at the beginning
+with the following fields:
+.br
+\f3forw\f1: next sibling block
+.br
+\f3back\f1: previous sibling block
+.br
+\f3magic\f1: magic number for this block type
+.sp
+The nonleaf (node) blocks have the following fields:
+.br
+\f3hdr\f1: header containing
+a \f3blkinfo\f1 structure \f3info\f1 (magic number 0xfebe),
+the \f3count\f1 of active entries,
+and the \f3level\f1 of this block above the leaves
+.br
+\f3btree\f1: array of entries containing
+\f3hashval\f1 and
+\f3before\f1 fields.
+The \f3before\f1 value is a block number within the directory file to the
+child block,
+the \f3hashval\f1 is the last hash value in that block
+.sp
+The leaf blocks have the following fields:
+.br
+\f3hdr\f1: header containing
+a \f3blkinfo\f1 structure \f3info\f1 (magic number 0xfeeb),
+the \f3count\f1 of active entries,
+\f3namebytes\f1 (total name string bytes),
+\f3holes\f1 flag (block needs compaction),
+and \f3freemap\f1 (array of \f3base\f1, \f3size\f1 entries for free regions)
+.br
+\f3entries\f1: array of structures containing
+\f3hashval\f1,
+\f3nameidx\f1 (byte index into the block of the name string),
+and \f3namelen\f1
+.br
+\f3namelist\f1: array of structures containing
+\f3inumber\f1
+and \f3name\f1
+.TP
+\f3dir2\f1
+A version 2 directory has four kinds of blocks.
+Data blocks start at offset 0 in the file.
+There are two kinds of data blocks: single-block directories have
+the leaf information embedded at the end of the block, data blocks
+in multi-block directories do not.
+Node and leaf blocks start at offset 32GB (with either a single
+leaf block or the root node block).
+Freespace blocks start at offset 64GB.
+The node and leaf blocks form a Btree, with references to the data
+in the data blocks.
+The freespace blocks form an index of longest free spaces within the
+data blocks.
+.sp
+A single-block directory block contains the following fields:
+.br
+\f3bhdr\f1: header containing 
+\f3magic\f1 number 0x58443242 ('XD2B')
+and an array \f3bestfree\f1 of the longest 3 free spaces in the block
+(\f3offset\f1, \f3length\f1)
+.br
+\f3bu\f1: array of union structures.
+Each element is either an entry or a freespace.
+For entries, there are the following fields:
+\f3inumber\f1,
+\f3namelen\f1,
+\f3name\f1,
+and \f3tag\f1.
+For freespace, there are the following fields:
+\f3freetag\f1 (0xffff),
+\f3length\f1,
+and \f3tag\f1.
+The \f3tag\f1 value is the byte offset in the block of the start
+of the entry it is contained in
+.br
+\f3bleaf\f1: array of leaf entries containing
+\f3hashval\f1
+and \f3address\f1.
+The \f3address\f1 is a 64-bit word offset into the file
+.br
+\f3btail\f1: tail structure containing
+the total \f3count\f1 of leaf entries
+and \f3stale\f1 count of unused leaf entries
+.sp
+A data block contains the following fields:
+.br
+\f3dhdr\f1:
+header containing 
+\f3magic\f1 number 0x58443244 ('XD2D')
+and an array \f3bestfree\f1 of the longest 3 free spaces in the block
+(\f3offset\f1, \f3length\f1)
+.br
+\f3du\f1: array of union structures as for \f3bu\f1
+.sp
+Leaf blocks have two possible forms.
+If the Btree consists of a single leaf then the freespace information
+is in the leaf block,
+otherwise it is in separate blocks and the root of the Btree is
+a node block.
+A leaf block contains the following fields:
+.br
+\f3lhdr\f1: header containing
+a \f3blkinfo\f1 structure \f3info\f1 (magic number 0xd2f1 for the single
+leaf case, 0xd2ff for the true Btree case),
+the total \f3count\f1 of leaf entries,
+and \f3stale\f1 count of unused leaf entries
+.br
+\f3lents\f1: leaf entries, as for \f3bleaf\f1
+.br
+\f3lbests\f1: [single leaf only]
+array of values which represent the longest freespace
+in each data block in the directory
+.br
+\f3ltail\f1: [single leaf only] tail structure containing
+\f3bestcount\f1 count of \f3lbests\f1
+.sp
+A node block is identical to that for types \f3attr\f1 and \f3dir\f1.
+.sp
+A freespace block contains the following fields:
+.br
+\f3fhdr\f1: header containing
+\f3magic\f1 number 0x58443246 ('XD2F'), 
+\f3firstdb\f1 first data block number covered by this freespace block,
+\f3nvalid\f1 number of valid entries,
+and \f3nused\f1 number of entries representing real data blocks
+.br
+\f3fbests\f1: array of values as for \f3lbests\f1
+.TP
+\f3dqblk\f1
+The quota information is stored in files referred to by the superblock
+\f3uquotino\f1 and \f3pquotino\f1 fields.
+Each filesystem block in a quota file contains a constant number of
+quota entries.
+The quota entry size is currently 136 bytes,
+so with a 4KB filesystem block size there are 30 quota entries per block.
+The \f3dquot\f1 command is used to locate these entries in the filesystem.
+The file entries are indexed by the user or project identifier
+to determine the block and offset.
+Each quota entry has the following fields:
+.br
+\f3magic\f1: magic number, 0x4451 ('DQ')
+.br
+\f3version\f1: version number, currently 1
+.br
+\f3flags\f1: flags, values include
+0x01 for user quota,
+0x02 for project quota
+.br
+\f3id\f1: user or project identifier
+.br
+\f3blk_hardlimit\f1: absolute limit on blocks in use
+.br
+\f3blk_softlimit\f1: preferred limit on blocks in use
+.br
+\f3ino_hardlimit\f1: absolute limit on inodes in use
+.br
+\f3ino_softlimit\f1: preferred limit on inodes in use
+.br
+\f3bcount\f1: blocks actually in use
+.br
+\f3icount\f1: inodes actually in use
+.br
+\f3itimer\f1: time when service will be refused if soft limit is violated
+for inodes
+.br
+\f3btimer\f1: time when service will be refused if soft limit is violated
+for blocks
+.br
+\f3iwarns\f1: number of warnings issued about inode limit violations
+.br
+\f3bwarns\f1: number of warnings issued about block limit violations
+.br
+\f3rtb_hardlimit\f1: absolute limit on realtime blocks in use
+.br
+\f3rtb_softlimit\f1: preferred limit on realtime blocks in use
+.br
+\f3rtbcount\f1: realtime blocks actually in use
+.br
+\f3rtbtimer\f1: time when service will be refused if soft limit is violated
+for realtime blocks
+.br
+\f3rtbwarns\f1: number of warnings issued about realtime block limit violations
+.TP
+\f3inobt\f1
+There is one set of filesystem blocks forming the inode allocation
+Btree for each allocation group.
+The root block of this Btree is designated by the \f3root\f1 field in the
+coresponding AGI block.
+The blocks are linked to sibling left and right blocks at each level,
+as well as by pointers from parent to child blocks.
+Each block has the following fields:
+.br
+\f3magic\f1: INOBT block magic number, 0x49414254 ('IABT')
+.br
+\f3level\f1: level number of this block, 0 is a leaf
+.br
+\f3numrecs\f1: number of data entries in the block
+.br
+\f3leftsib\f1: left (logically lower) sibling block, 0 if none
+.br
+\f3rightsib\f1: right (logically higher) sibling block, 0 if none
+.br
+\f3recs\f1: [leaf blocks only] array of inode records.
+Each record contains 
+\f3startino\f1 allocation-group relative inode number,
+\f3freecount\f1 count of free inodes in this chunk,
+and \f3free\f1 bitmap, LSB corresponds to inode 0
+.br
+\f3keys\f1: [nonleaf blocks only] array of key records.
+These are the first value of each block in the level below this one.
+Each record contains 
+\f3startino\f1
+.br
+\f3ptrs\f1: [nonleaf blocks only] array of child block pointers.
+Each pointer is a block number within the allocation group to the next level
+in the Btree
+.TP
+\f3inode\f1
+Inodes are allocated in ``chunks'' of 64 inodes each.
+Usually a chunk is multiple filesystem blocks, although there are cases
+with large filesystem blocks where a chunk is less than one block.
+The inode Btree (see \f3inobt\f1 above)
+refers to the inode numbers per allocation group.
+The inode numbers directly reflect the location of the inode block on disk.
+Use the \f3inode\f1 command to point \f2xfs_db\f1 to a specific inode.
+Each inode contains four regions:
+\f3core\f1,
+\f3next_unlinked\f1,
+\f3u\f1,
+and \f3a\f1.
+\f3core\f1 contains the fixed information.
+\f3next_unlinked\f1 is separated from the core due to
+journalling considerations, see type \f3agi\f1 field \f3unlinked\f1.
+\f3u\f1 is a union structure that is different in size and format depending
+on the type and representation of the file data (``data fork'').
+\f3a\f1 is an optional union structure to describe attribute data,
+that is different in size, format, and location depending on the presence
+and representation of attribute data, and the size of the \f3u\f1 data
+(``attribute fork'').
+\f2xfs_db\f1 automatically selects the proper union members based on
+information in the inode.
+.br
+The following are fields in the inode core:
+.br
+\f3magic\f1: inode magic number, 0x494e ('IN')
+.br
+\f3mode\f1: mode and type of file, as described in \f3chmod\f1(2),
+\f3mknod\f1(2), and \f3stat\f1(2)
+.br
+\f3version\f1: inode version, 1 or 2
+.br
+\f3format\f1: format of \f3u\f1 union data
+(0: dev_t,
+1: local file \- in-inode directory or symlink,
+2: extent list,
+3: Btree root,
+4: unique id [unused])
+.br
+\f3nlinkv1\f1: number of links to the file in a version 1 inode
+.br
+\f3nlinkv2\f1: number of links to the file in a version 2 inode
+.br
+\f3projid\f1: owner's project id (version 2 inode only)
+.br
+\f3uid\f1: owner's user id
+.br
+\f3gid\f1: owner's group id
+.br
+\f3atime\f1: time last accessed (seconds and nanoseconds)
+.br
+\f3mtime\f1: time last modified
+.br
+\f3ctime\f1: time created or inode last modified
+.br
+\f3size\f1: number of bytes in the file
+.br
+\f3nblocks\f1: total number of blocks in the file including
+indirect and attribute
+.br
+\f3extsize\f1: basic/minimum extent size for the file, used only for realtime
+.br
+\f3nextents\f1: number of extents in the data fork
+.br
+\f3naextents\f1: number of extents in the attribute fork
+.br
+\f3forkoff\f1: attribute fork offset in the inode,
+in 64-bit words from the start of \f3u\f1
+.br
+\f3aformat\f1: format of \f3a\f1 data
+(1: local attribute data,
+2: extent list,
+3: Btree root)
+.br
+\f3dmevmask\f1: DMAPI event mask
+.br
+\f3dmstate\f1: DMAPI state information
+.br
+\f3newrtbm\f1: file is the realtime bitmap and is ``new'' format
+.br
+\f3prealloc\f1: file has preallocated data space after EOF
+.br
+\f3realtime\f1: file data is in the realtime subvolume
+.br
+\f3gen\f1: inode generation number
+.sp
+The following fields are in the \f3u\f1 data fork union:
+.br
+\f3bmbt\f1: bmap Btree root.
+This looks like a \f3bmapbtd\f1 block with redundant information removed
+.br
+\f3bmx\f1: array of extent descriptors
+.br
+\f3dev\f1: dev_t for the block or character device
+.br
+\f3sfdir\f1: shortform (in-inode) version 1 directory.
+This consists of 
+a \f3hdr\f1 containing 
+the \f3parent\f1 inode number
+and a \f3count\f1 of active entries in the directory,
+followed by 
+an array \f3list\f1 of \f3hdr\f1.\f3count\f1 entries.
+Each such entry contains 
+\f3inumber\f1, 
+\f3namelen\f1,
+and \f3name\f1 string
+.br
+\f3sfdir2\f1: shortform (in-inode) version 2 directory.
+This consists of 
+a \f3hdr\f1 containing 
+a \f3count\f1 of active entries in the directory,
+an \f3i8count\f1 of entries with inumbers that don't fit in a 32-bit value,
+and the \f3parent\f1 inode number,
+followed by 
+an array \f3list\f1 of \f3hdr\f1.\f3count\f1 entries.
+Each such entry contains 
+\f3namelen\f1,
+a saved \f3offset\f1 used when the directory is converted to a larger form,
+a \f3name\f1 string,
+and the \f3inumber\f1
+.br
+\f3symlink\f1: symbolic link string value
+.sp
+The following fields are in the \f3a\f1 attribute fork union if it exists:
+.br
+\f3bmbt\f1: bmap Btree root, as above
+.br
+\f3bmx\f1: array of extent descriptors
+.br
+\f3sfattr\f1: shortform (in-inode) attribute values.
+This consists of
+a \f3hdr\f1 containing
+a \f3totsize\f1 (total size in bytes)
+and a \f3count\f1 of active entries,
+followed by
+an array \f3list\f1 of \f3hdr\f1.\f3count\f1 entries.
+Each such entry contains
+\f3namelen\f1,
+\f3valuelen\f1,
+\f3root\f1 flag,
+\f3name\f1,
+and \f3value\f1
+.TP
+\f3log\f1
+Log blocks contain the journal entries for XFS.
+It's not useful to examine these with \f2xfs_db\f1,
+use \f2xfs_logprint\f1(8) instead.
+.TP
+\f3rtbitmap\f1
+If the filesystem has a realtime subvolume, then the \f3rbmino\f1 field
+in the superblock refers to a file that contains the realtime bitmap.
+Each bit in the bitmap file controls the allocation of a single realtime extent
+(set == free).
+The bitmap is processed in 32-bit words,
+the LSB of a word is used for the first extent controlled by that bitmap word.
+The \f3atime\f1 field of the realtime bitmap inode contains a counter
+that is used to control where the next new realtime file will start.
+.TP
+\f3rtsummary\f1
+If the filesystem has a realtime subvolume,
+then the \f3rsumino\f1 field in the superblock refers to a file
+that contains the realtime summary data.
+The summary file contains a two-dimensional array of 16-bit values.
+Each value counts the number of free extent runs
+(consecutive free realtime extents)
+of a given range of sizes that starts in a given bitmap block.
+The size ranges are binary buckets (low size in the bucket is a power of 2).
+There are as many size ranges as are necessary given the size of the
+realtime subvolume.
+The first dimension is the size range,
+the second dimension is the starting bitmap block number
+(adjacent entries are for the same size, adjacent bitmap blocks).
+.TP
+\f3sb\f1
+There is one sb (superblock) structure per allocation group.
+It is the first disk block in the allocation group.
+Only the first one (block 0 of the filesystem) is actually used;
+the other blocks are redundant information for \f2xfs_repair\f1(8)
+to use if the first superblock is damaged.
+Fields defined:
+.br
+\f3magicnum\f1: superblock magic number, 0x58465342 ('XFSB')
+.br
+\f3blocksize\f1: filesystem block size in bytes
+.br
+\f3dblocks\f1: number of filesystem blocks present in the data subvolume
+.br
+\f3rblocks\f1: number of filesystem blocks present in the realtime subvolume
+.br
+\f3rextents\f1: number of realtime extents that \f3rblocks\f1 contain
+.br
+\f3uuid\f1: unique identifier of the filesystem
+.br
+\f3logstart\f1: starting filesystem block number of the log (journal).
+If this value is 0 the log is ``external''
+.br
+\f3rootino\f1: root inode number
+.br
+\f3rbmino\f1: realtime bitmap inode number
+.br
+\f3rsumino\f1: realtime summary data inode number
+.br
+\f3rextsize\f1: realtime extent size in filesystem blocks
+.br
+\f3agblocks\f1: size of an allocation group in filesystem blocks
+.br
+\f3agcount\f1: number of allocation groups
+.br
+\f3rbmblocks\f1: number of realtime bitmap blocks
+.br
+\f3logblocks\f1: number of log blocks (filesystem blocks)
+.br
+\f3versionnum\f1: filesystem version information.
+This value is currently 1, 2, 3, or 4 in the low 4 bits.
+If the low bits are 4 then the other bits have additional meanings.
+1 is the original value.
+2 means that attributes were used.
+3 means that version 2 inodes (large link counts) were used.
+4 is the bitmask version of the version number.
+In this case, the other bits are used as flags
+(0x0010: attributes were used,
+0x0020: version 2 inodes were used,
+0x0040: quotas were used,
+0x0080: inode cluster alignment is in force,
+0x0100: data stripe alignment is in force,
+0x0200: the \f3shared_vn\f1 field is used,
+0x1000: unwritten extent tracking is on,
+0x2000: version 2 directories are in use)
+.br
+\f3sectsize\f1: sector size in bytes, currently always 512.
+This is the size of the superblock and the other header blocks
+.br
+\f3inodesize\f1: inode size in bytes
+.br
+\f3inopblock\f1: number of inodes per filesystem block
+.br
+\f3fname\f1: obsolete, filesystem name
+.br
+\f3fpack\f1: obsolete, filesystem pack name
+.br
+\f3blocklog\f1: log2 of \f3blocksize\f1
+.br
+\f3sectlog\f1: log2 of \f3sectsize\f1
+.br
+\f3inodelog\f1: log2 of \f3inodesize\f1
+.br
+\f3inopblog\f1: log2 of \f3inopblock\f1
+.br
+\f3agblklog\f1: log2 of \f3agblocks\f1 (rounded up)
+.br
+\f3rextslog\f1: log2 of \f3rextents\f1
+.br
+\f3inprogress\f1: \f2mkfs.xfs\f1(8) aborted before completing this filesystem
+.br
+\f3imax_pct\f1: maximum percentage of filesystem space used for inode blocks
+.br
+\f3icount\f1: number of allocated inodes
+.br
+\f3ifree\f1: number of allocated inodes that are not in use
+.br
+\f3fdblocks\f1: number of free data blocks
+.br
+\f3frextents\f1: number of free realtime extents
+.br
+\f3uquotino\f1: user quota inode number
+.br
+\f3pquotino\f1: project quota inode number; this is currently unused
+.br
+\f3qflags\f1: quota status flags
+(0x01: user quota accounting is on,
+0x02: user quota limits are enforced,
+0x04: quotacheck has been run on user quotas,
+0x08: project quota accounting is on,
+0x10: project quota limits are enforced,
+0x20: quotacheck has been run on project quotas)
+.br
+\f3flags\f1: random flags.
+0x01: only read-only mounts are allowed
+.br
+\f3shared_vn\f1: shared version number (shared readonly filesystems)
+.br
+\f3inoalignmt\f1: inode chunk alignment in filesystem blocks
+.br
+\f3unit\f1: stripe or RAID unit
+.br
+\f3width\f1: stripe or RAID width
+.br
+\f3dirblklog\f1: log2 of directory block size (filesystem blocks)
+.TP
+\f3symlink\f1
+Symbolic link blocks are used only when the symbolic link value does
+not fit inside the inode.
+The block content is just the string value.
+Bytes past the logical end of the symbolic link value have arbitrary values.
+.SH DIAGNOSTICS
+Many messages can come from the \f3check\f1 (\f3blockget\f1) command;
+these are documented in \f2xfs_check\f1(8).
+.SH SEE ALSO
+mkfs.xfs(8),
+xfs_check(8),
+xfs_copy(8),
+xfs_logprint(8),
+xfs_ncheck(8),
+xfs_repair(8),
+chmod(2),
+mknod(2),
+stat(2),
+xfs(5).
diff --git a/man/man8/xfs_growfs.8 b/man/man8/xfs_growfs.8
new file mode 100644 (file)
index 0000000..5a2496c
--- /dev/null
@@ -0,0 +1,135 @@
+.TH xfs_growfs 8
+.SH NAME
+xfs_growfs, xfs_info \- expand an XFS filesystem
+.SH SYNOPSIS
+.nf
+\f3xfs_growfs\f1 [ \f3\-dilnrxV\f1 ] [ \f3\-D\f1 size ] [ \f3\-e\f1 rtextsize ]
+               [ \f3\-L\f1 size ] [ \f3\-m\f1 maxpct ] [ \f3-t\f1 mtab ]
+               [ \f3\-R\f1 size ] mount-point
+\f3xfs_info\f1 [ \f3-t\f1 mtab ] mount-point
+.fi
+.SH DESCRIPTION
+.I xfs_growfs
+expands an existing XFS filesystem (see
+.IR xfs (5)).
+The
+.I mount-point
+argument is the pathname of the directory where the filesystem
+is mounted.
+The filesystem must be mounted to be grown (see
+.IR mount (8)).
+The existing contents of the filesystem are undisturbed, and the added space
+becomes available for additional file storage.
+.PP
+.I xfs_info
+is equivalent to invoking
+.I xfs_growfs
+with the
+.B \-n
+option (see discussion below).
+.PP
+The options to
+.I xfs_growfs
+are:
+.TP
+\f3\-d\f1, \f3\-D\f1 \f2size\f1
+Specifies that the data section of the filesystem should be grown.
+If the
+.B \-D
+.I size
+option is given, the data section is grown to that size, otherwise
+the data section is grown to the largest size possible.
+The size
+is expressed in
+filesystem blocks.
+.TP
+.B \-e
+Allows the real-time extent size to be specified.
+In
+.IR mkfs.xfs (8)
+this is specified with
+.B \-r
+.BI extsize= nnnn.
+.TP
+.B \-i
+The new log is an internal log
+(inside the data section).
+.TP
+\f3\-l\f1, \f3\-L\f1 \f2size\f1
+Specifies that the log section of the filesystem should be grown,
+shrunk, or moved.
+If the
+.B \-L
+.I size
+option is given, the log section is changed to be that size,
+if possible.
+The size is expressed in
+filesystem blocks.
+The size of an internal log must be smaller than the size
+of an allocation group (this value is printed at \f2mkfs\f1(8) time).
+If neither
+.B \-i
+nor
+.B \-x
+is given with
+.BR \-l ,
+the log continues to be internal or external as it was before.
+.TP
+.B \-m
+Specify a new value for the maximum percentage
+of space in the filesystem that can be allocated as inodes.
+In
+.I mkfs.xfs
+this is specified with
+.B -i
+.BI maxpct= nn.
+.TP
+.B \-n
+Specifies that no change to the filesystem is to be made.
+The filesystem geometry is printed, and argument checking is performed,
+but no growth occurs.
+.TP
+\f3\-r\f1, \f3\-R\f1 \f2size\f1
+Specifies that the real-time section of the filesystem should be grown.
+If the
+.B \-R
+.I size
+option is given, the real-time section is grown to that size, otherwise
+the real-time section is grown to the largest size possible.
+The size
+is expressed in
+filesystem blocks.
+The filesystem does not need to have contained a real-time section before
+the \f2xfs_growfs\f1 operation.
+.TP
+.B \-t
+Specifies an alternate mount table file (default is
+.IR /etc/mtab ).
+This is used when working with filesystems mounted without writing to
+.I /etc/mtab
+file - refer to
+.BR mount (8)
+for further details.
+.TP
+.PP
+.I xfs_growfs
+is most often used in conjunction with
+logical volumes
+(see
+.IR lvm (8)
+).
+However, it can also be used on a regular disk partition, for example if a
+partition has been enlarged while retaining the same starting block.
+.SH PRACTICAL USE
+Filesystems normally occupy all of the space on the device where they
+reside.
+In order to grow a filesystem, it is necessary to provide added
+space for it to occupy.
+Therefore there must be at least one spare new
+disk partition available.
+Adding the space is done through the mechanism of
+logical volumes.
+.SH SEE ALSO
+mkfs.xfs(8),
+lvm(8),
+mount(8).
diff --git a/man/man8/xfs_logprint.8 b/man/man8/xfs_logprint.8
new file mode 100644 (file)
index 0000000..15ddc18
--- /dev/null
@@ -0,0 +1,86 @@
+.TH xfs_logprint 8
+.SH NAME
+xfs_logprint \- print the log of an XFS filesystem
+.SH SYNOPSIS
+.nf
+\f3xfs_logprint\f1 [ options ] device-name
+\f3xfs_logprint \-f\f1 [ options ] filename
+.fi
+.SH DESCRIPTION
+.I xfs_logprint
+prints the log of an XFS filesystem (see
+.IR xfs (5)).
+The
+.I device-name
+argument is the pathname of the partition or logical volume
+containing the filesystem.
+The contents of the filesystem remain undisturbed.
+There are two major modes of operation in
+.IR xfs_logprint .
+.PP
+One mode is better for filesystem operation debugging.
+It is called the transactional view and is enabled through the \f3\-t\f1
+option.
+The transactional view prints only the portion of the log that
+pertains to recovery.
+In other words, it prints out complete transactions between the tail
+and the head.
+This view tries to display each transaction without
+regard to how they are split across log records.
+.PP
+The second mode starts printing out information from the beginning of the log.
+Some error blocks might print out in the beginning because the last log
+record usually overlaps the oldest log record.
+A message is
+printed when the physical end of the log is reached and when the
+logical end of the log is reached.
+A log record view is displayed
+one record at a time.
+Transactions that span log records may not be
+decoded fully.
+.PP
+Common options are:
+.TP
+\f3\-b\f1
+Extract and print buffer information.
+Only used in transactional view.
+.TP
+\f3\-D\f1
+Don't decode anything;
+just print data.
+.TP
+\f3\-e\f1
+Exit when an error is found in the log.
+Normally,
+.I xfs_logprint
+tries to continue and unwind from bad logs.
+However, sometimes it just dies in bad ways.
+Using this option prevents core dumps.
+.TP
+\f3\-f\f1
+The log is a file.
+.TP
+\f3\-i\f1
+Extract and print inode information.
+Only used in transactional view.
+.TP
+\f3\-q\f1
+Extract and print quota information.
+Only used in transactional view.
+.TP
+\f3\-n\f1
+Don't try and interpret log data;
+just interpret log header information.
+.TP
+\f3\-o\f1
+Also print buffer data in hex.
+Normally, buffer data is just decoded, so better information can be printed.
+.TP
+\f3\-s\f1 \f2start-block\f1
+Override any notion of where to start printing.
+.TP
+\f3\-t\f1
+Print out the transactional view.
+.SH SEE ALSO
+mkfs.xfs(8),
+mount(8).
diff --git a/man/man8/xfs_mkfile.8 b/man/man8/xfs_mkfile.8
new file mode 100644 (file)
index 0000000..2cc1517
--- /dev/null
@@ -0,0 +1,27 @@
+.TH xfs_mkfile 8
+.SH NAME
+xfs_mkfile \- create an XFS file
+.SH SYNOPSIS
+.nf
+\f3xfs_mkfile\f1 [\f3\-v\f1] [\f3\-n\f1] \c
+\f2size\f1[\f3k\f1|\f3b\f1|\f3m\f1|\f3g\f1] \f2filename\f1...
+.fi
+.SH DESCRIPTION
+.I xfs_mkfile
+creates one or more files.
+The file is padded with zeroes by
+default.
+The default size is in bytes, but it can be
+flagged as kilobytes, blocks, megabytes, or gigabytes with the \f3k\f1,
+\f3b\f1, \f3m\f1, or \f3g\f1 suffixes, respectively.
+.SH OPTIONS
+.TP
+\f3\-v\f1
+Verbose.
+Report the names and sizes of created files.
+.TP
+\f3\-n\f1
+No bytes.
+Create a holey file - that is,
+do not write out any data, just
+seek to end of file and write a block.
diff --git a/man/man8/xfs_ncheck.8 b/man/man8/xfs_ncheck.8
new file mode 100644 (file)
index 0000000..201b43e
--- /dev/null
@@ -0,0 +1,53 @@
+.TH xfs_ncheck 8
+.SH NAME
+xfs_ncheck \- generate pathnames from i-numbers for XFS
+.SH SYNOPSIS
+.nf
+\f3xfs_ncheck\f1 [ \f3\-i\f1 ino ] ... \c
+[ \f3\-s\f1 ] xfs_special
+.sp .8v
+\f3xfs_ncheck\f1 \f3\-f\f1 [ \f3\-i\f1 ino ] ... \c
+[ \f3\-s\f1 ] file
+.fi
+.SH DESCRIPTION
+.I xfs_ncheck
+with no
+.B \-i
+arguments generates an inode number and pathname list of all
+files on the given filesystem.
+Names of directory files are followed by 
+.BR /. .
+The output is not sorted in any particular order.
+The filesystem to be examined is specified by the
+.I xfs_special
+argument, which should be the disk or volume device for the filesystem.
+Filesystems stored in files can also be checked, using the \f3\-f\f1 flag.
+.PP
+The options to \f2xfs_ncheck\f1 are:
+.TP 9
+.B \-f
+Specifies that the special device is actually a file (see the
+\f2mkfs.xfs\f1 \f3\-d\f1 \f2file\f1 option).
+This might happen if an image copy
+of a filesystem has been made into an ordinary file.
+.TP
+.B \-s
+Limits the report to special files and files with setuserid mode.
+This option may be used to detect violations of security policy.
+.TP
+.BI \-i " ino"
+Limits the report to only those files whose inode numbers follow.
+May be given multiple times to select multiple inode numbers.
+.PP
+If the filesystem is seriously corrupted, or very busy and looks
+like it is corrupt, a message of the form that would be generated by
+.IR xfs_check (8)
+may appear.
+.PP
+.I xfs_ncheck
+is only useful with XFS filesystems.
+.SH SEE ALSO
+mkfs.xfs(8),
+xfs_ncheck(8),
+xfs_check(8),
+xfs(5).
diff --git a/man/man8/xfs_repair.8 b/man/man8/xfs_repair.8
new file mode 100644 (file)
index 0000000..014620c
--- /dev/null
@@ -0,0 +1,353 @@
+.TH xfs_repair 8
+.SH NAME
+xfs_repair \- repair an XFS filesystem
+.SH SYNOPSIS
+.nf
+\f3xfs_repair\f1 [ \f3\-n\f1 ] [ \f3\-o\f1 subopt[=value] ] xfs_special
+.sp .8v
+\f3xfs_repair\f1 \f3\-f\f1 [ \f3\-n\f1 ] [ \f3\-o\f1 subopt[=value] ] ... file
+.fi
+.SH DESCRIPTION
+.I xfs_repair
+repairs corrupt or damaged XFS filesystems
+(see
+.IR xfs (5)).
+The filesystem is specified using the
+.I xfs_special
+argument which should be the device name of the
+disk partition or volume containing
+the filesystem.
+If given the name of a block device,
+.I xfs_repair
+will attempt to find the raw device associated
+with the specified block device and will use the raw device
+instead.
+.PP
+Regardless, the filesystem to be repaired
+must be unmounted,
+otherwise, the resulting filesystem may be inconsistent or corrupt.
+.PP
+The options to \f2xfs_repair\f1 are:
+.TP
+.B \-f
+Specifies that the special device is actually a file (see the
+\f2mkfs.xfs\f1 \f3\-d\f1 \f2file\f1 option).
+This might happen if an image copy
+of a filesystem has been copied or written into an ordinary file.
+.TP
+.B \-n
+No modify mode.
+Specifies that
+.I xfs_repair
+should not modify the filesystem but should only scan the
+filesystem and indicate what repairs would have been made.
+.TP
+.B \-o
+Override what the program might conclude about the filesystem
+if left to its own devices.
+.IP
+The
+.B assume_xfs
+suboption
+specifies that the filesystem is an XFS filesystem.
+Normally, if
+.I xfs_repair
+cannot find an XFS superblock, it checks to see if the
+filesystem is an EFS filesystem before it tries to
+regenerate the XFS superblock.
+If the
+.B assume_xfs
+option is in effect,
+.I xfs_repair
+will assume that the filesystem is an XFS filesystem and
+will ignore an EFS superblock if one is found.
+.SS Checks Performed
+Inconsistencies corrected include the following:
+.TP
+1.
+Inode and inode blockmap (addressing) checks:
+bad magic number in inode,
+bad magic numbers in inode blockmap blocks,
+extents out of order,
+incorrect number of records in inode blockmap blocks,
+blocks claimed that are not in a legal data area of the filesystem,
+blocks that are claimed by more than one inode.
+.TP
+2.
+Inode allocation map checks:
+bad magic number in inode map blocks,
+inode state as indicated by map (free or in-use) inconsistent
+with state indicated by the inode,
+inodes referenced by the filesystem that do not appear in
+the inode allocation map,
+inode allocation map referencing blocks that do not appear
+to contain inodes.
+.TP
+3.
+Size checks:
+number of blocks claimed by inode inconsistent with inode size,
+directory size not block aligned,
+inode size not consistent with inode format.
+.TP
+4.
+Directory checks:
+bad magic numbers in directory blocks,
+incorrect number of entries in a directory block,
+bad freespace information in a directory leaf block,
+entry pointing to an unallocated (free) or out
+of range inode,
+overlapping entries,
+missing or incorrect dot and dotdot entries,
+entries out of hashvalue order,
+incorrect internal directory pointers,
+directory type not consistent with inode format and size.
+.TP
+5.
+Pathname checks:
+files or directories not referenced by a pathname starting from
+the filesystem root,
+illegal pathname components.
+.TP
+6.
+Link count checks:
+link counts that do not agree with the number of
+directory references to the inode.
+.TP
+7.
+Freemap checks:
+blocks claimed free by the freemap but also claimed by an inode,
+blocks unclaimed by any inode but not appearing in the freemap.
+.TP
+8.
+Super Block checks:
+total free block and/or free i-node count incorrect,
+filesystem geometry inconsistent,
+secondary and primary superblocks contradictory.
+.PP
+Orphaned files and directories (allocated, in-use but unreferenced) are
+reconnected by placing them in the
+.I lost+found
+directory.
+The name assigned is the inode number.
+.SS Disk Errors
+.I xfs_repair
+aborts on most disk I/O errors.
+Therefore, if you are trying
+to repair a filesystem that was damaged due to a disk drive failure,
+steps should be taken to ensure that
+all blocks in the filesystem are readable and writeable
+before attempting to use
+.I xfs_repair
+to repair the filesystem.
+A possible method is using
+.IR dd (8)
+to copy the data onto a good disk.
+.SS lost+found
+The directory
+.I lost+found
+does not have to already exist in the filesystem being repaired.
+If the directory does not exist, it is automatically created.
+If the \f2lost+found\f1 directory already exists,
+the \f2lost+found\f1
+directory is deleted and recreated every time \f2xfs_repair\f1
+runs.
+This ensures that there are no name conflicts in \f2lost+found\f1.
+However, if you rename a file in \f2lost+found\f1 and leave it there,
+if \f2xfs_repair\f1 is run again, that file is renamed back to
+its inode number.
+.SS Corrupted Superblocks
+XFS has both primary and secondary superblocks.
+\f2xfs_repair\f1 uses information in the primary superblock
+to automatically find and validate the primary superblock
+against the secondary superblocks before proceeding.
+Should the primary be too corrupted to be useful in locating
+the secondary superblocks, the program scans the filesystem
+until it finds and validates some secondary superblocks.
+At that point, it generates a primary superblock.
+.SS Quotas
+If quotas are in use, it is possible that \f2xfs_repair\f1 will clear
+some or all of the filesystem quota information.
+If so, the program issues a warning just before it terminates.
+If all quota information is lost, quotas are disabled and the
+program issues a warning to that effect.
+.PP
+Note that \f2xfs_repair\f1 does not check the validity of quota limits.
+It is recommended that you check the quota limit information manually
+after \f2xfs_repair\f1.
+Also, space usage information is automatically regenerated the
+next time the filesystem is mounted with quotas turned on, so the
+next quota mount of the filesystem may take some time.
+.SH DIAGNOSTICS
+.I xfs_repair
+issues informative messages as it proceeds
+indicating what it has found that is abnormal or any corrective
+action that it has taken.
+Most of the messages are completely understandable only to those
+who are knowledgeable about the structure of the filesystem.
+Some of the more common messages are explained here.
+Note that the language of the messages is slightly different
+if \f2xfs_repair\f1 is run in no-modify mode because the program is not
+changing anything on disk.
+No-modify mode indicates what it would do to repair the filesystem
+if run without the no-modify flag.
+.PP
+disconnected inode \f3xxxx\f1, moving to \f2lost+found\f1
+.IP
+An inode numbered
+.B xxxx
+was not connected to the filesystem
+directory tree and was reconnected to the \f2lost+found\f1 directory.
+The inode is assigned the name of its inode number (i-number).
+If a \f2lost+found\f1 directory does not exist, it is automatically
+created.
+.PP
+disconnected dir inode \f3xxxx\f1, moving to \f2lost+found\f1
+.IP
+As above only the inode is a directory inode.
+If a directory inode is attached to \f2lost+found\f1, all of its
+children (if any) stay attached to the directory and therefore
+get automatically reconnected when the directory is reconnected.
+.PP
+imap claims in-use inode \f3xxxx\f1 is free, correcting imap
+.IP
+The inode allocation map thinks that inode \f3xxxx\f1 is
+free whereas examination of the inode indicates that the
+inode may be in use (although it may be disconnected).
+The program updates the inode allocation map.
+.PP
+imap claims free inode \f3xxxx\f1 is in use, correcting imap
+.IP
+The inode allocation map thinks that inode \f3xxxx\f1 is
+in use whereas examination of the inode indicates that the
+inode is not in use and therefore is free.
+The program updates the inode allocation map.
+.PP
+resetting inode \f3xxxx\f1 nlinks from \f3x\f1 to \f3y\f1
+.IP
+The program detected a mismatch between the
+number of valid directory entries referencing inode \f3xxxx\f1
+and the number of references recorded in the inode and corrected the
+the number in the inode.
+.PP
+\f3fork-type\f1 fork in ino \f3xxxx\f1 claims used block \f3yyyy\f1
+.IP
+Inode \f3xxxx\f1 claims a block \f3yyyy\f1 that is used (claimed)
+by either another inode or the filesystem itself for metadata storage.
+The \f3fork-type\f1 is either \f3data\f1 or \f3attr\f1
+indicating whether the problem lies in the portion of the
+inode that tracks regular data or the portion of the inode
+that stores XFS attributes.
+If the inode is a real-time (rt) inode, the message says so.
+Any inode that claims blocks used by the filesystem is deleted.
+If two or more inodes claim the same block, they are both deleted.
+.PP
+\f3fork-type\f1 fork in ino \f3xxxx\f1 claims dup extent ...
+.IP
+Inode \f3xxxx\f1 claims a block in an extent known to be
+claimed more than once.
+The offset in the inode, start and length of the extent is given.
+The message is slightly different
+if the inode is a real-time (rt) inode and the extent is therefore
+a real-time (rt) extent.
+.PP
+inode \f3xxxx\f1 - bad extent ...
+.IP
+An extent record in the blockmap of inode \f3xxxx\f1 claims
+blocks that are out of the legal range of the filesystem.
+The message supplies the start, end, and file offset of
+the extent.
+The message is slightly different
+if the extent is a real-time (rt) exent.
+.PP
+bad \f3fork-type\f1 fork in inode \f3xxxx\f1
+.IP
+There was something structurally wrong or inconsistent with the
+data structures that map offsets to filesystem blocks.
+.PP
+cleared inode \f3xxxx\f1
+.IP
+There was something wrong with the inode that
+was uncorrectable so the program freed the inode.
+This usually happens because the inode claims
+blocks that are used by something else or the inode itself
+is badly corrupted.
+Typically, this message
+is preceded by one or more messages indicating why the
+inode needed to be cleared.
+.PP
+bad attribute fork in inode \f3xxxx\f1, clearing attr fork
+.IP
+There was something wrong with the portion of the inode that
+stores XFS attributes (the attribute fork) so the program reset
+the attribute fork.
+As a result of this, all attributes on that inode are lost.
+.PP
+correcting nextents for inode \f3xxxx\f1, was \f3x\f1 - counted \f3y\f1
+.IP
+The program found that the number of extents used to store
+the data in the inode is wrong and corrected the number.
+The message refers to nextents if the count is wrong
+on the number of extents used to store attribute information.
+.PP
+entry \f3"name"\f1 in dir \f3xxxx\f1 not consistent
+with ..
+value (\f3yyyy\f1) in dir ino \f3xxxx\f1,
+junking entry \f3"name"\f1 in directory inode \f3xxxx\f1
+.IP
+The entry \f3"name"\f1 in directory inode \f3xxxx\f1 references a
+directory inode \f3yyyy\f1.
+However, the ..\& entry in directory \f3yyyy\f1 does not point
+back to directory \f3xxxx\f1,
+so the program deletes the entry \f3"name"\f1 in directory inode
+\f3xxxx\f1.
+If the directory inode \f3yyyy\f1 winds up becoming a disconnected
+inode as a result of this, it is moved to \f2lost+found\f1 later.
+.PP
+entry \f3"name"\f1 in dir \f3xxxx\f1 references already
+connected dir ino \f3yyyy\f1,
+junking entry \f3"name"\f1 in directory inode \f3xxxx\f1
+.IP
+The entry \f3"name"\f1 in directory inode \f3xxxx\f1 points to a
+directory inode \f3yyyy\f1 that is known to be a child of another
+directory.
+Therefore, the entry is invalid and is deleted.
+This message refers to an entry in a small directory.
+If this were a large directory, the last phrase would read
+"will clear entry".
+.PP
+entry references free inode \f3xxxx\f1 in directory \f3yyyy\f1,
+will clear entry
+.IP
+An entry in directory inode \f3yyyy\f1 references an inode \f3xxxx\f1
+that is known to be free.
+The entry is therefore invalid and is deleted.
+This message refers to a large directory.
+If the directory were small, the message would read "junking entry ...".
+.SH EXIT STATUS
+.I xfs_repair -n
+(no modify node)
+will return a status of 1 if filesystem corruption was detected and
+0 if no filesystem corruption was detected.
+.I xfs_repair
+run without the -n option will always return a status code of 0.
+.SH BUGS
+.I xfs_repair
+does not do a thorough job on XFS extended attributes.
+The structure of the attribute fork will be consistent,
+but only the contents of attribute forks that will fit into
+an inode are checked.
+This limitation will be fixed in the future.
+.PP
+The no-modify mode (\f3\-n\f1 option) is not completely
+accurate.
+It does not catch inconsistencies in the freespace and inode
+maps, particularly lost blocks or subtly corrupted maps (trees).
+.PP
+The no-modify mode can generate repeated warnings about
+the same problems because it cannot fix the problems as they
+are encountered.
+.SH SEE ALSO
+dd(1),
+mkfs.xfs(8),
+xfs_check(8),
+xfs(5).
diff --git a/mkfile/Makefile b/mkfile/Makefile
new file mode 100644 (file)
index 0000000..fc274e8
--- /dev/null
@@ -0,0 +1,45 @@
+#
+# Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+# 
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# 
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like.  Any license provided herein, whether implied or
+# otherwise, applies only to this software file.  Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write the Free Software Foundation, Inc., 59
+# Temple Place - Suite 330, Boston MA 02111-1307, USA.
+# 
+# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+# Mountain View, CA  94043, or:
+# 
+# http://www.sgi.com 
+# 
+# For further information regarding this notice, see: 
+# 
+# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+#
+
+TOPDIR = ..
+include $(TOPDIR)/include/builddefs
+
+CMDTARGET = xfs_mkfile
+CFILES = xfs_mkfile.c
+
+default: $(CMDTARGET)
+
+include $(BUILDRULES)
+
+install: default
+       $(INSTALL) -m 755 -d $(XFS_CMDS_BIN_DIR)
+       $(INSTALL) -m 755 $(CMDTARGET) $(XFS_CMDS_BIN_DIR)
diff --git a/mkfile/xfs_mkfile.c b/mkfile/xfs_mkfile.c
new file mode 100644 (file)
index 0000000..f880d93
--- /dev/null
@@ -0,0 +1,284 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+/* 
+ * Make file utility for xfs.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <ctype.h>
+#include <malloc.h>
+#include <errno.h>
+#include <libxfs.h>
+
+#undef O_DIRECT
+#define O_DIRECT 0     /* nathans TODO - remove this when direct IO done */
+
+#define        MAXBUFFERSIZE   (256 * 1024)
+
+static void usage(void);
+
+int
+main(int argc, char **argv)
+{
+       int fd;
+       loff_t result;
+       loff_t size = 0;
+       loff_t mult = 0;
+       int bytes = 0;
+       loff_t wrote = 0;
+       int len = 0;
+       int c;
+       int errflg = 0;
+       int errs = 0;
+       int nobytes = 0;
+       int prealloc = 0;
+       int verbose = 0;
+       struct dioattr da;
+       char *progname;
+       void *buf = NULL;
+       int buflen = 0, nbuflen;
+       int bufalign = 0, nbufalign, bufmin;
+       int oflags;
+       xfs_flock64_t flck;
+
+       progname = basename(argv[0]);
+       while ((c = getopt(argc, argv, "npvV")) != EOF) {
+               switch(c) {
+                       case 'n':
+                               nobytes++;
+                               break;
+                       case 'p':
+                               prealloc++;
+                               break;
+                       case 'v':
+                               verbose++;
+                               break;
+                       case 'V':
+                               printf("%s version %s\n", progname, VERSION);
+                               break;
+                       default:
+                               errflg++;
+                               break;
+               }
+       }
+
+       if (argc < optind + 2 || errflg)
+               usage();
+
+       mult = 1;
+
+       len = strlen(argv[optind]);
+
+       if (isalpha(argv[optind][len-1])) {
+               switch (argv[optind][len-1]) {
+               case 'k':
+               case 'K':
+                       mult = 1024;
+                       break;
+               case 'b':
+               case 'B':
+                       mult = 512;
+                       break;
+               case 'm':
+               case 'M':
+                       mult  = 1024;
+                       mult *= 1024;
+                       break;
+               case 'g':
+               case 'G':
+                       mult  = 1024;
+                       mult *= 1024;
+                       mult *= 1024;
+                       break;
+               default:
+                       fprintf(stderr, "unknown size %s\n", argv[optind]);
+                       usage();
+               }
+
+               argv[optind][len-1] = '\0';
+       }
+
+       size = atoll(argv[optind]) * mult;
+
+       optind++;
+
+       while (optind < argc) {
+               if (verbose)
+                       fprintf(stdout, "%s %lld bytes %s\n",
+                                               argv[optind], size,
+                                               prealloc
+                                                 ? "(pre-allocated)"
+                                                 : "");
+
+               oflags = O_CREAT|O_TRUNC|O_WRONLY|(nobytes ? 0 : O_DIRECT);
+
+               fd = open(argv[optind], oflags, 0600);
+
+               if (   (oflags & O_DIRECT)
+                   && (   (fd < 0 && errno == EINVAL)
+                       || ioctl(fd, XFS_IOC_DIOINFO, &da) < 0)) {
+
+                       close(fd);
+
+                       oflags &= ~O_DIRECT;
+
+                       fd = open(argv[optind], oflags, 0600);
+               }
+
+               if (fd < 0) {
+                       perror(argv[optind]);
+                       optind++;
+                       errs++;
+                       continue;
+               }
+
+               if (size == 0) {
+                       close(fd);
+                       optind++;
+                       continue;
+               }
+
+               if ((result = lseek64(fd, size - 1, SEEK_SET)) < 0LL) {
+                       /*
+                        * This check doesn't actually work for 6.2
+                        * efs and nfs2, although it should.
+                        */
+                       fprintf(stderr,
+                               "lseek64 error, result = %lld\n", result);
+                       if (errno)
+                               perror(argv[optind]);
+                       errs++;
+               } else if (nobytes) {
+                       if (write(fd, "", 1) < 0) {
+                               perror(argv[optind]);
+                               errs++;
+                       }
+               } else {
+                       flck.l_whence = SEEK_SET;
+                       flck.l_start  = 0LL;
+                       flck.l_len    = size;
+#if 0
+                       (void)ioctl(fd, XFS_IOC_RESVSP64, &flck);
+
+                       if (prealloc) {
+                               if ( close(fd) < 0 ) {
+                                       perror(argv[optind]);
+                                       unlink(argv[optind]);
+                                       errs++;
+                               }
+
+                               optind++;
+
+                               continue;
+                       }
+#endif
+                       if (oflags & O_DIRECT) {
+                               nbufalign = da.d_mem;
+
+                               if (   da.d_miniosz <= MAXBUFFERSIZE
+                                   && MAXBUFFERSIZE <= da.d_maxiosz)
+                                       nbuflen = MAXBUFFERSIZE;
+                               else if (da.d_maxiosz < MAXBUFFERSIZE)
+                                       nbuflen = da.d_maxiosz;
+                               else
+                                       nbuflen = da.d_miniosz;
+
+                               bufmin = da.d_miniosz;
+                       } else {
+                               nbuflen = MAXBUFFERSIZE;
+                               nbufalign = sizeof(long);
+                               bufmin = 0;
+                       }
+
+                       if (nbuflen > buflen || nbufalign > bufalign) {
+                               if (buf)
+                                       free(buf);
+                               buf = memalign(nbufalign, nbuflen);
+                               buflen = nbuflen;
+                               bzero(buf, nbuflen);
+                               nbufalign = bufalign;
+                       }
+
+                       wrote = 0;
+
+                       lseek64(fd, 0LL, SEEK_SET);
+
+                       while (wrote < size) {
+                               if (size - wrote >= buflen)
+                                       bytes = buflen;
+                               else if (bufmin)
+                                       bytes = roundup(size - wrote, bufmin);
+                               else
+                                       bytes = size - wrote;
+
+                               len = write(fd, buf, bytes);
+
+                               if (len < 0) {
+                                       perror(argv[optind]);
+                                       unlink(argv[optind]);
+                                       errs++;
+                                       break;
+                               }
+
+                               wrote += len;
+                       }
+
+                       if (wrote > size && ftruncate64(fd, size) < 0) {
+                               perror(argv[optind]);
+                               unlink(argv[optind]);
+                               errs++;
+                       }
+               }
+
+               if ( close(fd) < 0 ) {
+                       perror(argv[optind]);
+                       unlink(argv[optind]);
+                       errs++;
+               }
+
+               optind++;
+       }
+
+       return errs != 0;
+}
+
+static void
+usage(void)
+{
+       fprintf(stderr, "mkfile: [-npv] <size> <name1> [<name2>] ...\n");
+       exit(2);
+}
diff --git a/mkfs/Makefile b/mkfs/Makefile
new file mode 100644 (file)
index 0000000..d6f813d
--- /dev/null
@@ -0,0 +1,59 @@
+#
+# Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+# 
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# 
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like.  Any license provided herein, whether implied or
+# otherwise, applies only to this software file.  Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write the Free Software Foundation, Inc., 59
+# Temple Place - Suite 330, Boston MA 02111-1307, USA.
+# 
+# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+# Mountain View, CA  94043, or:
+# 
+# http://www.sgi.com 
+# 
+# For further information regarding this notice, see: 
+# 
+# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+#
+
+TOPDIR = ..
+include $(TOPDIR)/include/builddefs
+
+CMDTARGET = mkfs.xfs
+CMDDEPS        = $(LIBXFS)
+MAXTRRES = maxtrres
+
+CFILES = xfs_mkfs.c mountinfo.c proto.c
+HFILES = xfs_mkfs.h mountinfo.h proto.h volume.h
+LLDLIBS = $(LIBXFS) $(LIBUUID) $(LIBLVM)
+MAXTRLIBS = $(LIBXFS) $(LIBUUID)
+LSRCFILES = $(MAXTRRES).c
+LDIRT = $(MAXTRRES) $(MAXTRRES).h
+
+default: $(MAXTRRES).h $(CMDTARGET)
+
+include $(BUILDRULES)
+
+install: default
+       $(INSTALL) -m 755 -d $(XFS_CMDS_SBIN_DIR)
+       $(INSTALL) -m 755 $(CMDTARGET) $(XFS_CMDS_SBIN_DIR)
+
+$(MAXTRRES):
+       $(CCF) $@.c -o $@ $(LDFLAGS) $(MAXTRLIBS)
+
+$(MAXTRRES).h: $(MAXTRRES)
+       ./$(MAXTRRES) > $@ || ( rm -f $@ && exit 1 )
diff --git a/mkfs/maxtrres.c b/mkfs/maxtrres.c
new file mode 100644 (file)
index 0000000..638d945
--- /dev/null
@@ -0,0 +1,193 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+/*
+ * maxtrres
+ * 
+ * Compute the maximum transaction reservation for every legal
+ * combination of block size, inode size, directory version, 
+ * and directory block size.
+ * Generates a table compiled into mkfs, to control the default
+ * and minimum log sizes.
+ */
+
+#include <libxfs.h>
+#include "xfs_mkfs.h"
+
+xfs_trans_reservations_t tr_count = {
+       XFS_WRITE_LOG_COUNT,            /* extent alloc trans */
+       XFS_ITRUNCATE_LOG_COUNT,        /* truncate trans */
+       XFS_RENAME_LOG_COUNT,           /* rename trans */
+       XFS_LINK_LOG_COUNT,             /* link trans */
+       XFS_REMOVE_LOG_COUNT,           /* unlink trans */
+       XFS_SYMLINK_LOG_COUNT,          /* symlink trans */
+       XFS_CREATE_LOG_COUNT,           /* create trans */
+       XFS_MKDIR_LOG_COUNT,            /* mkdir trans */
+       XFS_DEFAULT_LOG_COUNT,          /* inode free trans */
+       XFS_DEFAULT_LOG_COUNT,          /* inode update trans */
+       XFS_DEFAULT_LOG_COUNT,          /* fs data section grow trans */
+       XFS_DEFAULT_LOG_COUNT,          /* sync write inode trans */
+       XFS_ADDAFORK_LOG_COUNT,         /* cvt inode to attributed trans */
+       XFS_DEFAULT_LOG_COUNT,          /* write setuid/setgid file */
+       XFS_ATTRINVAL_LOG_COUNT,        /* attr fork buffer invalidation */
+       XFS_ATTRSET_LOG_COUNT,          /* set/create an attribute */
+       XFS_ATTRRM_LOG_COUNT,           /* remove an attribute */
+       XFS_DEFAULT_LOG_COUNT,          /* clear bad agi unlinked ino bucket */
+       XFS_DEFAULT_PERM_LOG_COUNT,     /* grow realtime allocations */
+       XFS_DEFAULT_LOG_COUNT,          /* grow realtime zeroing */
+       XFS_DEFAULT_LOG_COUNT,          /* grow realtime freeing */
+};
+
+static int
+max_trans_res(
+       xfs_mount_t                     *mp,
+       int                             *mul)
+{
+       uint                            *p;
+       uint                            *q;
+       int                             rval;
+       xfs_trans_reservations_t        *tr;
+       xfs_da_args_t                   args;
+       int                             local;
+       int                             size;
+       int                             nblks;
+       int                             res;
+
+       nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK);
+
+       /*
+        * Fill in the arg structure for this request.
+        */
+       bzero(&args, sizeof(args));
+       args.name = NULL;
+       args.namelen = MAXNAMELEN;
+       args.value = NULL;
+       args.valuelen = 65536;
+       args.flags = 0;
+       args.hashval = 0;
+       args.dp = NULL;
+       args.firstblock = NULL;
+       args.flist = NULL;
+       args.whichfork = XFS_ATTR_FORK;
+       args.oknoent = 1;
+
+       /*
+        * Determine space new attribute will use, and if it will be
+        * inline or out of line.
+        */
+       size = libxfs_attr_leaf_newentsize(
+                       &args, mp->m_sb.sb_blocksize, &local);
+
+       if (local) {
+               printf("Uh-oh.. attribute is local\n");
+       } else {
+               /* Out of line attribute, cannot double split, but make
+                * room for the attribute value itself.
+                */
+               nblks += XFS_B_TO_FSB(mp, size);
+               nblks += XFS_NEXTENTADD_SPACE_RES(mp, size, XFS_ATTR_FORK);
+       }
+       res = XFS_ATTRSET_LOG_RES(mp, nblks);
+#if 0
+       printf("size = %d nblks = %d res = %d\n", size, nblks, res);
+#endif
+       mp->m_reservations.tr_attrset = res;
+
+       for (rval = 0, tr = &mp->m_reservations, p = (uint *)tr,
+            q = (uint *)&tr_count;
+            p < (uint *)(tr + 1);
+            p++, q++) {
+               if ((int)*p > rval) {
+                       rval = (int)*p;
+                       *mul = (int)*q;
+               }
+       }
+       return rval;
+}
+
+int
+main(int argc, char **argv)
+{
+       int             bl;
+       int             dl;
+       int             dv;
+       int             i;
+       int             il;
+       xfs_mount_t     m;
+       xfs_sb_t        *sbp;
+       int             mul;
+
+       progname = basename(argv[0]);
+       if (argc > 1) {
+               fprintf(stderr, "Usage: %s\n", progname);
+               return 1;
+       }
+       memset(&m, 0, sizeof(m));
+       sbp = &m.m_sb;
+       sbp->sb_magicnum = XFS_SB_MAGIC;
+       sbp->sb_sectlog = 9;
+       sbp->sb_sectsize = 1 << sbp->sb_sectlog;
+       for (bl = XFS_MIN_BLOCKSIZE_LOG; bl <= XFS_MAX_BLOCKSIZE_LOG; bl++) {
+               sbp->sb_blocklog = bl;
+               sbp->sb_blocksize = 1 << bl;
+               sbp->sb_agblocks = XFS_AG_MIN_BYTES / (1 << bl);
+               for (il = XFS_DINODE_MIN_LOG; il <= XFS_DINODE_MAX_LOG; il++) {
+                       if ((1 << il) > (1 << bl) / XFS_MIN_INODE_PERBLOCK)
+                               continue;
+                       sbp->sb_inodelog = il;
+                       sbp->sb_inopblog = bl - il;
+                       sbp->sb_inodesize = 1 << il;
+                       sbp->sb_inopblock = 1 << (bl - il);
+                       for (dl = bl; dl <= XFS_MAX_BLOCKSIZE_LOG; dl++) {
+                               sbp->sb_dirblklog = dl - bl;
+                               for (dv = 1; dv <= 2; dv++) {
+                                       if (dv == 1 && dl != bl)
+                                               continue;
+                                       sbp->sb_versionnum =
+                                               XFS_SB_VERSION_4 |
+                                               (dv == 2 ?
+                                                   XFS_SB_VERSION_DIRV2BIT :
+                                                   0);
+                                       libxfs_mount(&m, sbp, 0, 0, 0, 0);
+                                       i = max_trans_res(&m, &mul);
+                                       printf(
+                               "#define\tMAXTRRES_B%d_I%d_D%d_V%d\t%lld\t"
+                               "/* LOG_FACTOR %d */\n",
+                                               bl, il, dl, dv,
+                                               XFS_B_TO_FSB(&m, i), mul);
+                                       libxfs_umount(&m);
+                               }
+                       }
+               }
+       }
+       return 0;
+}
diff --git a/mkfs/proto.c b/mkfs/proto.c
new file mode 100644 (file)
index 0000000..8570d14
--- /dev/null
@@ -0,0 +1,769 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include <fcntl.h>
+#include <errno.h>
+#include "proto.h"
+
+/*
+ * Prototypes for internal functions.
+ */
+extern long long cvtnum(int blocksize, char *s);
+extern void parseproto(xfs_mount_t *mp, xfs_inode_t *pip, char **pp,
+       char *name); 
+static long getnum(char **pp);
+static char *getstr(char **pp);
+static void fail(char *msg, int i);
+static void getres(xfs_trans_t *tp, uint blocks);
+static void rsvfile(xfs_mount_t *mp, xfs_inode_t *ip, long long len);
+static int newfile(xfs_trans_t *tp, xfs_inode_t *ip, xfs_bmap_free_t *flist,
+       xfs_fsblock_t *first, int dolocal, int logit, char *buf, int len);
+static char *newregfile(char **pp, int *len); 
+static void rtinit(xfs_mount_t *mp);
+static long filesize(int fd);
+
+/*
+ * Use this for block reservations needed for mkfs's conditions
+ * (basically no fragmentation).
+ */
+#define        MKFS_BLOCKRES_INODE     \
+       ((uint)(XFS_IALLOC_BLOCKS(mp) + (XFS_IN_MAXLEVELS(mp) - 1)))
+#define        MKFS_BLOCKRES(rb)       \
+       ((uint)(MKFS_BLOCKRES_INODE + XFS_DA_NODE_MAXDEPTH + \
+       (XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - 1) + (rb)))
+
+
+char *
+setup_proto(
+       char    *fname)
+{
+       char            *buf;
+       static char     dflt[] = "d--755 0 0 $";
+       int             fd;
+       long            size;
+
+       if (!fname)
+               return dflt;
+       if ((fd = open(fname, O_RDONLY)) < 0 || (size = filesize(fd)) < 0) {
+               fprintf(stderr, "%s: failed to open %s: %s\n",
+                       progname, fname, strerror(errno));
+               exit(1);
+       }
+       buf = malloc(size + 1);
+       if (read(fd, buf, size) < size) {
+               fprintf(stderr, "%s: read failed on %s: %s\n",
+                       progname, fname, strerror(errno));
+               exit(1);
+       }
+       if (buf[size - 1] != '\n') {
+               fprintf(stderr, "%s: proto file %s premature EOF\n",
+                       progname, fname);
+               exit(1);
+       }
+       buf[size] = '\0';
+       /*
+        * Skip past the stuff there for compatibility, a string and 2 numbers.
+        */
+       (void)getstr(&buf);     /* boot image name */
+       (void)getnum(&buf);     /* block count */
+       (void)getnum(&buf);     /* inode count */
+       return buf;
+}
+
+static long
+getnum(
+       char    **pp)
+{
+       char    *s;
+
+       s = getstr(pp);
+       return atol(s);
+}
+
+static void
+fail(
+       char    *msg,
+       int     i)
+{
+       fprintf(stderr, "%s: %s %d\n", progname, msg, i);
+       ASSERT(0);
+       exit(1);
+}
+
+static void
+getres(
+       xfs_trans_t     *tp,
+       uint            blocks)
+{
+       int             i;
+       xfs_mount_t     *mp;
+       uint            r;
+
+       mp = tp->t_mountp;
+       for (i = 0, r = MKFS_BLOCKRES(blocks); r >= blocks; r--) {
+               i = libxfs_trans_reserve(tp, r, 0, 0, 0, 0);
+               if (i == 0)
+                       return;
+       }
+       res_failed(i);
+       /* NOTREACHED */
+}
+
+static char *
+getstr(
+       char    **pp)
+{
+       int     c;
+       char    *p;
+       char    *rval;
+
+       p = *pp;
+       while (c = *p) {
+               switch (c) {
+               case ' ':
+               case '\t':
+               case '\n':
+                       p++;
+                       continue;
+               case ':':
+                       p++;
+                       while (*p++ != '\n')
+                               ;
+                       continue;
+               default:
+                       rval = p;
+                       while (c != ' ' && c != '\t' && c != '\n' && c != '\0')
+                               c = *++p;
+                       *p++ = '\0';
+                       *pp = p;
+                       return rval;
+               }
+       }
+       if (!c) {
+               fprintf(stderr, "%s: premature EOF in prototype file\n",
+                       progname);
+               exit(1);
+       }
+       return NULL;
+}
+
+static void
+rsvfile(
+       xfs_mount_t     *mp,
+       xfs_inode_t     *ip,
+       long long       llen)
+{
+       int             error;
+       xfs_trans_t     *tp;
+
+       error = libxfs_alloc_file_space(ip, 0, llen, 1, 0);
+
+       if (error) {
+               fail("error reserving space for a file", error);
+               exit(1);
+       }
+
+       /*
+        * update the inode timestamp, mode, and prealloc flag bits
+        */
+       tp = libxfs_trans_alloc(mp, 0);
+
+       libxfs_trans_ijoin(tp, ip, 0);
+       libxfs_trans_ihold(tp, ip);
+
+       ip->i_d.di_mode &= ~ISUID;
+
+       /*
+        * Note that we don't have to worry about mandatory
+        * file locking being disabled here because we only
+        * clear the ISGID bit if the Group execute bit is
+        * on, but if it was on then mandatory locking wouldn't
+        * have been enabled.
+        */
+       if (ip->i_d.di_mode & (IEXEC >> 3))
+               ip->i_d.di_mode &= ~ISGID;
+
+       libxfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+
+       ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC;
+
+       libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+       libxfs_trans_commit(tp, 0, NULL);
+}
+
+static int
+newfile(
+       xfs_trans_t     *tp,
+       xfs_inode_t     *ip,
+       xfs_bmap_free_t *flist,
+       xfs_fsblock_t   *first,
+       int             dolocal,
+       int             logit,
+       char            *buf,
+       int             len)
+{
+       xfs_buf_t       *bp;
+       xfs_daddr_t     d;
+       int             error;
+       int             flags;
+       xfs_bmbt_irec_t map;
+       xfs_mount_t     *mp;
+       xfs_extlen_t    nb;
+       int             nmap;
+
+       flags = 0;
+       mp = ip->i_mount;
+       if (dolocal && len <= XFS_IFORK_DSIZE(ip)) {
+               libxfs_idata_realloc(ip, len, XFS_DATA_FORK);
+               if (buf)
+                       bcopy(buf, ip->i_df.if_u1.if_data, len);
+               ip->i_d.di_size = len;
+               ip->i_df.if_flags &= ~XFS_IFEXTENTS;
+               ip->i_df.if_flags |= XFS_IFINLINE;
+               ip->i_d.di_format = XFS_DINODE_FMT_LOCAL;
+               flags = XFS_ILOG_DDATA;
+       } else if (len > 0) {
+               nb = XFS_B_TO_FSB(mp, len);
+               nmap = 1;
+               error = libxfs_bmapi(tp, ip, 0, nb, XFS_BMAPI_WRITE, first, nb,
+                               &map, &nmap, flist);
+               if (error) {
+                       fail("error allocating space for a file", error);
+               }
+               if (nmap != 1) {
+                       fprintf(stderr, "%s: cannot allocate space for file\n",
+                               progname);
+                       exit(1);
+               }
+               d = XFS_FSB_TO_DADDR(mp, map.br_startblock);
+               bp = libxfs_trans_get_buf(logit ? tp : 0, mp->m_dev, d,
+                       nb << mp->m_blkbb_log, 0);
+               bcopy(buf, XFS_BUF_PTR(bp), len);
+               if (len < XFS_BUF_COUNT(bp))
+                       bzero(XFS_BUF_PTR(bp) + len, XFS_BUF_COUNT(bp) - len);
+               if (logit)
+                       libxfs_trans_log_buf(tp, bp, 0, XFS_BUF_COUNT(bp) - 1);
+               else
+                       libxfs_writebuf(bp, 1);
+       }
+       ip->i_d.di_size = len;
+       return flags;
+}
+
+static char *
+newregfile(
+       char            **pp,
+       int             *len)
+{
+       char            *buf;
+       int             fd;
+       char            *fname;
+       long            size;
+
+       fname = getstr(pp);
+       if ((fd = open(fname, O_RDONLY)) < 0 || (size = filesize(fd)) < 0) {
+               fprintf(stderr, "%s: cannot open %s: %s\n",
+                       progname, fname, strerror(errno));
+               exit(1);
+       }
+       if (*len = (int)size) {
+               buf = malloc(size);
+               if (read(fd, buf, size) < size) {
+                       fprintf(stderr, "%s: read failed on %s: %s\n",
+                               progname, fname, strerror(errno));
+                       exit(1);
+               }
+       } else
+               buf = 0;
+       close(fd);
+       return buf;
+}
+
+static void
+newdirent(
+       xfs_mount_t     *mp,
+       xfs_trans_t     *tp,
+       xfs_inode_t     *pip,
+       char            *name,
+       int             namelen,
+       xfs_ino_t       inum,
+       xfs_fsblock_t   *first,
+       xfs_bmap_free_t *flist,
+       xfs_extlen_t    total)
+{
+       int     error;
+
+       if (XFS_SB_VERSION_HASDIRV2(&mp->m_sb))
+               error = libxfs_dir2_createname(tp, pip, name, namelen,
+                                               inum, first, flist, total);
+       else
+               error = libxfs_dir_createname(tp, pip, name, namelen,
+                                               inum, first, flist, total);
+       if (error)
+               fail("directory createname error", error);
+}
+
+static void
+newdirectory(
+       xfs_mount_t     *mp,
+       xfs_trans_t     *tp,
+       xfs_inode_t     *dp,
+       xfs_inode_t     *pdp)
+{
+       int     error;
+
+       if (XFS_SB_VERSION_HASDIRV2(&mp->m_sb))
+               error = libxfs_dir2_init(tp, dp, pdp);
+       else
+               error = libxfs_dir_init(tp, dp, pdp);
+       if (error)
+               fail("directory create error", error);
+}
+
+void
+parseproto(
+       xfs_mount_t     *mp,
+       xfs_inode_t     *pip,
+       char            **pp,
+       char            *name)
+{
+#define        IF_REGULAR      0
+#define        IF_RESERVED     1
+#define        IF_BLOCK        2
+#define        IF_CHAR         3
+#define        IF_DIRECTORY    4
+#define        IF_SYMLINK      5
+#define        IF_FIFO         6
+
+       char            *buf;
+       int             committed;
+       int             error;
+       xfs_fsblock_t   first;
+       int             flags;
+       xfs_bmap_free_t flist;
+       int             fmt;
+       int             i;
+       xfs_inode_t     *ip;
+       int             len;
+       long long       llen;
+       int             majdev;
+       int             mindev;
+       int             mode;
+       char            *mstr;
+       xfs_trans_t     *tp;
+       int             val;
+       int             isroot = 0;
+       cred_t          creds;
+       char            *value;
+
+       bzero(&creds, sizeof(creds));
+       mstr = getstr(pp);
+       switch (mstr[0]) {
+       case '-':
+               fmt = IF_REGULAR;
+               break;
+       case 'r':
+               fmt = IF_RESERVED;
+               break;
+       case 'b':
+               fmt = IF_BLOCK;
+               break;
+       case 'c':
+               fmt = IF_CHAR;
+               break;
+       case 'd':
+               fmt = IF_DIRECTORY;
+               break;
+       case 'l':
+               fmt = IF_SYMLINK;
+               break;
+       case 'p':
+               fmt = IF_FIFO;
+               break;
+       default:
+               fprintf(stderr, "%s: bad format string %s\n", progname, mstr);
+               exit(1);
+       }
+       mode = 0;
+       switch (mstr[1]) {
+       case '-':
+               break;
+       case 'u':
+               mode |= ISUID;
+               break;
+       default:
+               fprintf(stderr, "%s: bad format string %s\n", progname, mstr);
+               exit(1);
+       }
+       switch (mstr[2]) {
+       case '-':
+               break;
+       case 'g':
+               mode |= ISGID;
+               break;
+       default:
+               fprintf(stderr, "%s: bad format string %s\n", progname, mstr);
+               exit(1);
+       }
+       val = 0;
+       for (i = 3; i < 6; i++) {
+               if (mstr[i] < '0' || mstr[i] > '7') {
+                       fprintf(stderr, "%s: bad format string %s\n",
+                               progname, mstr);
+                       exit(1);
+               }
+               val = val * 8 + mstr[i] - '0';
+       }
+       mode |= val;
+       creds.cr_uid = (int)getnum(pp);
+       creds.cr_gid = (int)getnum(pp);
+       tp = libxfs_trans_alloc(mp, 0);
+       flags = XFS_ILOG_CORE;
+       XFS_BMAP_INIT(&flist, &first);
+       switch (fmt) {
+       case IF_REGULAR:
+               buf = newregfile(pp, &len);
+               getres(tp, XFS_B_TO_FSB(mp, len));
+               error = libxfs_inode_alloc(&tp, pip, mode|IFREG, 1,
+                                       mp->m_dev, &creds, &ip);
+               if (error)
+                       fail("Inode allocation failed", error);
+               flags |= newfile(tp, ip, &flist, &first, 0, 0, buf, len);
+               if (buf)
+                       free(buf);
+               libxfs_trans_ijoin(tp, pip, 0);
+               i = strlen(name);
+               newdirent(mp, tp, pip, name, i, ip->i_ino, &first, &flist, 1);
+               libxfs_trans_ihold(tp, pip);
+               break;
+
+       case IF_RESERVED:                       /* pre-allocated space only */
+               value = getstr(pp);
+               llen = cvtnum(mp->m_sb.sb_blocksize, value);
+               getres(tp, XFS_B_TO_FSB(mp, llen));
+
+               error = libxfs_inode_alloc(&tp, pip, mode|IFREG, 1,
+                                               mp->m_dev, &creds, &ip);
+               if (error)
+                       fail("Inode pre-allocation failed", error);
+
+               libxfs_trans_ijoin(tp, pip, 0);
+
+               i = strlen(name);
+               newdirent(mp, tp, pip, name, i, ip->i_ino, &first, &flist, 1);
+               libxfs_trans_ihold(tp, pip);
+               libxfs_trans_log_inode(tp, ip, flags);
+
+               error = libxfs_bmap_finish(&tp, &flist, first, &committed);
+               if (error)
+                       fail("Pre-allocated file creation failed", error);
+               libxfs_trans_commit(tp, 0, NULL);
+               rsvfile(mp, ip, llen);
+               return;
+
+       case IF_BLOCK:
+               getres(tp, 0);
+               majdev = (int)getnum(pp);
+               mindev = (int)getnum(pp);
+               error = libxfs_inode_alloc(&tp, pip, mode|IFBLK, 1,
+                               makedev(majdev, mindev), &creds, &ip);
+               if (error) {
+                       fail("Inode allocation failed", error);
+               }
+               libxfs_trans_ijoin(tp, pip, 0);
+               i = strlen(name);
+               newdirent(mp, tp, pip, name, i, ip->i_ino, &first, &flist, 1);
+               libxfs_trans_ihold(tp, pip);
+               flags |= XFS_ILOG_DEV;
+               break;
+
+       case IF_CHAR:
+               getres(tp, 0);
+               majdev = (int)getnum(pp);
+               mindev = (int)getnum(pp);
+               error = libxfs_inode_alloc(&tp, pip, mode|IFCHR, 1,
+                               makedev(majdev, mindev), &creds, &ip);
+               if (error)
+                       fail("Inode allocation failed", error);
+               libxfs_trans_ijoin(tp, pip, 0);
+               i = strlen(name);
+               newdirent(mp, tp, pip, name, i, ip->i_ino, &first, &flist, 1);
+               libxfs_trans_ihold(tp, pip);
+               flags |= XFS_ILOG_DEV;
+               break;
+
+       case IF_FIFO:
+               getres(tp, 0);
+               error = libxfs_inode_alloc(&tp, pip, mode|IFIFO, 1,
+                               mp->m_dev, &creds, &ip);
+               if (error)
+                       fail("Inode allocation failed", error);
+               libxfs_trans_ijoin(tp, pip, 0);
+               i = strlen(name);
+               newdirent(mp, tp, pip, name, i, ip->i_ino, &first, &flist, 1);
+               libxfs_trans_ihold(tp, pip);
+               break;
+       case IF_SYMLINK:
+               buf = getstr(pp);
+               len = (int)strlen(buf);
+               getres(tp, XFS_B_TO_FSB(mp, len));
+               error = libxfs_inode_alloc(&tp, pip, mode|IFLNK, 1,
+                               mp->m_dev, &creds, &ip);
+               if (error)
+                       fail("Inode allocation failed", error);
+               flags |= newfile(tp, ip, &flist, &first, 1, 1, buf, len);
+               libxfs_trans_ijoin(tp, pip, 0);
+               i = strlen(name);
+               newdirent(mp, tp, pip, name, i, ip->i_ino, &first, &flist, 1);
+               libxfs_trans_ihold(tp, pip);
+               break;
+       case IF_DIRECTORY:
+               getres(tp, 0);
+               error = libxfs_inode_alloc(&tp, pip, mode|IFDIR, 1,
+                               mp->m_dev, &creds, &ip);
+               if (error)
+                       fail("Inode allocation failed", error);
+               ip->i_d.di_nlink++;             /* account for . */
+               if (!pip) {
+                       pip = ip;
+                       mp->m_sb.sb_rootino = ip->i_ino;
+                       libxfs_mod_sb(tp, XFS_SB_ROOTINO);
+                       mp->m_rootip = ip;
+                       isroot = 1;
+               } else {
+                       libxfs_trans_ijoin(tp, pip, 0);
+                       i = strlen(name);
+                       newdirent(mp, tp, pip, name, i, ip->i_ino,
+                                 &first, &flist, 1);
+                       pip->i_d.di_nlink++;
+                       libxfs_trans_ihold(tp, pip);
+                       libxfs_trans_log_inode(tp, pip, XFS_ILOG_CORE);
+               }
+               newdirectory(mp, tp, ip, pip);
+               libxfs_trans_log_inode(tp, ip, flags);
+               error = libxfs_bmap_finish(&tp, &flist, first, &committed);
+               if (error)
+                       fail("Directory creation failed", error);
+               libxfs_trans_ihold(tp, ip);
+               libxfs_trans_commit(tp, 0, NULL);
+               /*
+                * RT initialization.  Do this here to ensure that
+                * the RT inodes get placed after the root inode.
+                */
+               if (isroot)
+                       rtinit(mp);
+               tp = NULL;
+               for (;;) {
+                       name = getstr(pp);
+                       if (strcmp(name, "$") == 0)
+                               break;
+                       parseproto(mp, ip, pp, name);
+               }
+               libxfs_iput(ip, 0);
+               return;
+       }
+       libxfs_trans_log_inode(tp, ip, flags);
+       error = libxfs_bmap_finish(&tp, &flist, first, &committed);
+       if (error) {
+               fail("Error encountered creating file from prototype", error);
+       }
+       libxfs_trans_commit(tp, 0, NULL);
+}
+
+/*
+ * Allocate the realtime bitmap and summary inodes, and fill in data if any.
+ */
+static void
+rtinit(
+       xfs_mount_t     *mp)
+{
+       xfs_dfiloff_t   bno;
+       int             committed;
+       xfs_dfiloff_t   ebno;
+       xfs_bmbt_irec_t *ep;
+       int             error;
+       xfs_fsblock_t   first;
+       xfs_bmap_free_t flist;
+       int             i;
+       xfs_bmbt_irec_t map[XFS_BMAP_MAX_NMAP];
+       xfs_extlen_t    nsumblocks;
+       int             nmap;
+       xfs_inode_t     *rbmip;
+       xfs_inode_t     *rsumip;
+       xfs_trans_t     *tp;
+       cred_t          creds;
+
+       /*
+        * First, allocate the inodes.
+        */
+       tp = libxfs_trans_alloc(mp, 0);
+       if (i = libxfs_trans_reserve(tp, MKFS_BLOCKRES_INODE, 0, 0, 0, 0))
+               res_failed(i);
+       bzero(&creds, sizeof(creds));
+       error = libxfs_inode_alloc(&tp, mp->m_rootip, IFREG, 1,
+                               mp->m_dev, &creds, &rbmip);
+       if (error) {
+               fail("Realtime bitmap inode allocation failed", error);
+       }
+       /*
+        * Do our thing with rbmip before allocating rsumip,
+        * because the next call to ialloc() may
+        * commit the transaction in which rbmip was allocated.
+        */
+       mp->m_sb.sb_rbmino = rbmip->i_ino;
+       rbmip->i_d.di_size = mp->m_sb.sb_rbmblocks * mp->m_sb.sb_blocksize;
+       rbmip->i_d.di_flags = XFS_DIFLAG_NEWRTBM;
+       *(__uint64_t *)&rbmip->i_d.di_atime = 0;
+       libxfs_trans_log_inode(tp, rbmip, XFS_ILOG_CORE);
+       libxfs_mod_sb(tp, XFS_SB_RBMINO);
+       libxfs_trans_ihold(tp, rbmip);
+       mp->m_rbmip = rbmip;
+       error = libxfs_inode_alloc(&tp, mp->m_rootip, IFREG, 1,
+                               mp->m_dev, &creds, &rsumip);
+       if (error) {
+               fail("Realtime bitmap inode allocation failed", error);
+       }
+       mp->m_sb.sb_rsumino = rsumip->i_ino;
+       rsumip->i_d.di_size = mp->m_rsumsize;
+       libxfs_trans_log_inode(tp, rsumip, XFS_ILOG_CORE);
+       libxfs_mod_sb(tp, XFS_SB_RSUMINO);
+       libxfs_trans_ihold(tp, rsumip);
+       libxfs_trans_commit(tp, 0, NULL);
+       mp->m_rsumip = rsumip;
+       /*
+        * Next, give the bitmap file some zero-filled blocks.
+        */
+       tp = libxfs_trans_alloc(mp, 0);
+       if (i = libxfs_trans_reserve(tp, mp->m_sb.sb_rbmblocks +
+                       (XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - 1), 0, 0, 0, 0))
+               res_failed(i);
+       libxfs_trans_ijoin(tp, rbmip, 0);
+       bno = 0;
+       XFS_BMAP_INIT(&flist, &first);
+       while (bno < mp->m_sb.sb_rbmblocks) {
+               nmap = XFS_BMAP_MAX_NMAP;
+               error = libxfs_bmapi(tp, rbmip, bno,
+                               (xfs_extlen_t)(mp->m_sb.sb_rbmblocks - bno),
+                               XFS_BMAPI_WRITE, &first, mp->m_sb.sb_rbmblocks,
+                               map, &nmap, &flist);
+               if (error) {
+                       fail("Allocation of the realtime bitmap failed", error);
+               }
+               for (i = 0, ep = map; i < nmap; i++, ep++) {
+                       libxfs_device_zero(mp->m_dev,
+                               XFS_FSB_TO_DADDR(mp, ep->br_startblock),
+                               XFS_FSB_TO_BB(mp, ep->br_blockcount));
+                       bno += ep->br_blockcount;
+               }
+       }
+
+       error = libxfs_bmap_finish(&tp, &flist, first, &committed);
+       if (error) {
+               fail("Allocation of the realtime bitmap failed", error);
+       }
+       libxfs_trans_commit(tp, 0, NULL);
+       /*
+        * Give the summary file some zero-filled blocks.
+        */
+       tp = libxfs_trans_alloc(mp, 0);
+       nsumblocks = mp->m_rsumsize >> mp->m_sb.sb_blocklog;
+       if (i = libxfs_trans_reserve(tp,
+                       nsumblocks + (XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - 1),
+                       0, 0, 0, 0))
+               res_failed(i);
+       libxfs_trans_ijoin(tp, rsumip, 0);
+       bno = 0;
+       XFS_BMAP_INIT(&flist, &first);
+       while (bno < nsumblocks) {
+               nmap = XFS_BMAP_MAX_NMAP;
+               error = libxfs_bmapi(tp, rsumip, bno,
+                               (xfs_extlen_t)(nsumblocks - bno),
+                               XFS_BMAPI_WRITE, &first, nsumblocks,
+                               map, &nmap, &flist);
+               if (error) {
+                       fail("Allocation of the realtime bitmap failed", error);
+               }
+               for (i = 0, ep = map; i < nmap; i++, ep++) {
+                       libxfs_device_zero(mp->m_dev,
+                               XFS_FSB_TO_DADDR(mp, ep->br_startblock),
+                               XFS_FSB_TO_BB(mp, ep->br_blockcount));
+                       bno += ep->br_blockcount;
+               }
+       }
+       error = libxfs_bmap_finish(&tp, &flist, first, &committed);
+       if (error) {
+               fail("Allocation of the realtime bitmap failed", error);
+       }
+       libxfs_trans_commit(tp, 0, NULL);
+       /*
+        * Free the whole area using transactions.
+        * Do one transaction per bitmap block.
+        */
+       for (bno = 0; bno < mp->m_sb.sb_rextents; bno = ebno) {
+               tp = libxfs_trans_alloc(mp, 0);
+               if (i = libxfs_trans_reserve(tp, 0, 0, 0, 0, 0))
+                       res_failed(i);
+               XFS_BMAP_INIT(&flist, &first);
+               ebno = XFS_RTMIN(mp->m_sb.sb_rextents,
+                       bno + NBBY * mp->m_sb.sb_blocksize);
+               error = libxfs_rtfree_extent(tp, bno, (xfs_extlen_t)(ebno-bno));
+               if (error) {
+                       fail("Error initializing the realtime bitmap", error);
+               }
+               error = libxfs_bmap_finish(&tp, &flist, first, &committed);
+               if (error) {
+                       fail("Error initializing the realtime bitmap", error);
+               }
+               libxfs_trans_commit(tp, 0, NULL);
+       }
+}
+
+void
+res_failed(
+       int     err)
+{
+       fprintf(stderr, "%s: ran out of disk space!\n", progname);
+       ASSERT(0);
+       exit(1);
+}
+
+static long
+filesize(
+       int             fd)
+{
+       struct stat64   stb;
+
+       if (fstat64(fd, &stb) < 0)
+               return -1;
+       return (long)stb.st_size;
+}
diff --git a/mkfs/proto.h b/mkfs/proto.h
new file mode 100644 (file)
index 0000000..e588e48
--- /dev/null
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+char *setup_proto(char *fname);
+void parseproto(xfs_mount_t *mp, xfs_inode_t *pip, char **pp, char *name);
+void res_failed(int err);
diff --git a/mkfs/xfs_mkfs.c b/mkfs/xfs_mkfs.c
new file mode 100644 (file)
index 0000000..13132b9
--- /dev/null
@@ -0,0 +1,1944 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include <fcntl.h>
+#include <errno.h>
+#include "xfs_mkfs.h"
+#include "proto.h"
+#include "volume.h"
+#include "maxtrres.h"
+#include "mountinfo.h"
+
+#if HAVE_LIBLVM
+  #include "lvm_user.h"
+
+  char *cmd;           /* Not used. liblvm is broken */
+  int opt_d;           /* Same thing */
+#endif
+
+/*
+ * Prototypes for internal functions.
+ */
+static void conflict(char opt, char *tab[], int oldidx, int newidx);
+static void illegal(char *value, char *opt);
+static void reqval(char opt, char *tab[], int idx);
+static void respec(char opt, char *tab[], int idx);
+static void unknown(char opt, char *s);
+static int  ispow2(unsigned int i);
+static int  max_trans_res(xfs_mount_t *mp);
+
+/*
+ * option tables for getsubopt calls
+ */
+char   *bopts[] = {
+#define        B_LOG           0
+       "log",
+#define        B_SIZE          1
+       "size",
+       NULL
+};
+
+char   *dopts[] = {
+#define        D_AGCOUNT       0
+       "agcount",
+#define        D_FILE          1
+       "file",
+#define        D_NAME          2
+       "name",
+#define        D_SIZE          3
+       "size",
+#define D_SUNIT                4
+       "sunit",
+#define D_SWIDTH       5
+       "swidth",
+#define D_UNWRITTEN    6
+       "unwritten",
+       NULL
+};
+
+char   *iopts[] = {
+#define        I_ALIGN         0
+       "align",
+#define        I_LOG           1
+       "log",
+#define        I_MAXPCT        2
+       "maxpct",
+#define        I_PERBLOCK      3
+       "perblock",
+#define        I_SIZE          4
+       "size",
+       NULL
+};
+
+char   *lopts[] = {
+#define        L_AGNUM         0
+       "agnum",
+#define        L_INTERNAL      1
+       "internal",
+#define        L_SIZE          2
+       "size",
+#define L_DEV          3
+       "logdev",
+#ifdef MKFS_SIMULATION
+#define        L_FILE          4
+       "file",
+#define        L_NAME          5
+       "name",
+#endif
+       NULL
+};
+
+char   *nopts[] = {
+#define        N_LOG           0
+       "log",
+#define        N_SIZE          1
+       "size",
+#define        N_VERSION       2
+       "version",
+       NULL,
+};
+
+char   *ropts[] = {
+#define        R_EXTSIZE       0
+       "extsize",
+#define        R_SIZE          1
+       "size",
+#define        R_DEV           2
+       "rtdev",
+#ifdef MKFS_SIMULATION
+#define        R_FILE          3
+       "file",
+#define        R_NAME          4
+       "name",
+#endif
+       NULL
+};
+
+/*
+ * max transaction reservation values
+ * version 1:
+ * first dimension log(blocksize) (base XFS_MIN_BLOCKSIZE_LOG)
+ * second dimension log(inodesize) (base XFS_DINODE_MIN_LOG)
+ * version 2:
+ * first dimension log(blocksize) (base XFS_MIN_BLOCKSIZE_LOG)
+ * second dimension log(inodesize) (base XFS_DINODE_MIN_LOG)
+ * third dimension log(dirblocksize) (base XFS_MIN_BLOCKSIZE_LOG)
+ */
+#define        DFL_B   (XFS_MAX_BLOCKSIZE_LOG + 1 - XFS_MIN_BLOCKSIZE_LOG)
+#define        DFL_I   (XFS_DINODE_MAX_LOG + 1 - XFS_DINODE_MIN_LOG)
+#define        DFL_D   (XFS_MAX_BLOCKSIZE_LOG + 1 - XFS_MIN_BLOCKSIZE_LOG)
+
+static const int max_trres_v1[DFL_B][DFL_I] = {
+       { MAXTRRES_B9_I8_D9_V1, 0, 0, 0 },
+       { MAXTRRES_B10_I8_D10_V1, MAXTRRES_B10_I9_D10_V1, 0, 0 },
+       { MAXTRRES_B11_I8_D11_V1, MAXTRRES_B11_I9_D11_V1,
+         MAXTRRES_B11_I10_D11_V1, 0 },
+       { MAXTRRES_B12_I8_D12_V1, MAXTRRES_B12_I9_D12_V1,
+         MAXTRRES_B12_I10_D12_V1, MAXTRRES_B12_I11_D12_V1 },
+       { MAXTRRES_B13_I8_D13_V1, MAXTRRES_B13_I9_D13_V1,
+         MAXTRRES_B13_I10_D13_V1, MAXTRRES_B13_I11_D13_V1 },
+       { MAXTRRES_B14_I8_D14_V1, MAXTRRES_B14_I9_D14_V1,
+         MAXTRRES_B14_I10_D14_V1, MAXTRRES_B14_I11_D14_V1 },
+       { MAXTRRES_B15_I8_D15_V1, MAXTRRES_B15_I9_D15_V1,
+         MAXTRRES_B15_I10_D15_V1, MAXTRRES_B15_I11_D15_V1 },
+       { MAXTRRES_B16_I8_D16_V1, MAXTRRES_B16_I9_D16_V1,
+         MAXTRRES_B16_I10_D16_V1, MAXTRRES_B16_I11_D16_V1 },
+};
+
+static const int max_trres_v2[DFL_B][DFL_I][DFL_D] = {
+       { { MAXTRRES_B9_I8_D9_V2, MAXTRRES_B9_I8_D10_V2, MAXTRRES_B9_I8_D11_V2,
+           MAXTRRES_B9_I8_D12_V2, MAXTRRES_B9_I8_D13_V2, MAXTRRES_B9_I8_D14_V2,
+           MAXTRRES_B9_I8_D15_V2, MAXTRRES_B9_I8_D16_V2 },
+         { 0, 0, 0, 0, 0, 0, 0, 0 },
+         { 0, 0, 0, 0, 0, 0, 0, 0 },
+         { 0, 0, 0, 0, 0, 0, 0, 0 } },
+       { { 0, MAXTRRES_B10_I8_D10_V2, MAXTRRES_B10_I8_D11_V2,
+           MAXTRRES_B10_I8_D12_V2, MAXTRRES_B10_I8_D13_V2,
+           MAXTRRES_B10_I8_D14_V2, MAXTRRES_B10_I8_D15_V2,
+           MAXTRRES_B10_I8_D16_V2 },
+         { 0, MAXTRRES_B10_I9_D10_V2, MAXTRRES_B10_I9_D11_V2,
+           MAXTRRES_B10_I9_D12_V2, MAXTRRES_B10_I9_D13_V2,
+           MAXTRRES_B10_I9_D14_V2, MAXTRRES_B10_I9_D15_V2,
+           MAXTRRES_B10_I9_D16_V2 },
+         { 0, 0, 0, 0, 0, 0, 0, 0 },
+         { 0, 0, 0, 0, 0, 0, 0, 0 } },
+       { { 0, 0, MAXTRRES_B11_I8_D11_V2, MAXTRRES_B11_I8_D12_V2,
+           MAXTRRES_B11_I8_D13_V2, MAXTRRES_B11_I8_D14_V2,
+           MAXTRRES_B11_I8_D15_V2, MAXTRRES_B11_I8_D16_V2 },
+         { 0, 0, MAXTRRES_B11_I9_D11_V2, MAXTRRES_B11_I9_D12_V2,
+           MAXTRRES_B11_I9_D13_V2, MAXTRRES_B11_I9_D14_V2,
+           MAXTRRES_B11_I9_D15_V2, MAXTRRES_B11_I9_D16_V2 },
+         { 0, 0, MAXTRRES_B11_I10_D11_V2, MAXTRRES_B11_I10_D12_V2,
+           MAXTRRES_B11_I10_D13_V2, MAXTRRES_B11_I10_D14_V2,
+           MAXTRRES_B11_I10_D15_V2, MAXTRRES_B11_I10_D16_V2 },
+         { 0, 0, 0, 0, 0, 0, 0, 0 } },
+       { { 0, 0, 0, MAXTRRES_B12_I8_D12_V2, MAXTRRES_B12_I8_D13_V2,
+           MAXTRRES_B12_I8_D14_V2, MAXTRRES_B12_I8_D15_V2,
+           MAXTRRES_B12_I8_D16_V2 },
+         { 0, 0, 0, MAXTRRES_B12_I9_D12_V2, MAXTRRES_B12_I9_D13_V2,
+           MAXTRRES_B12_I9_D14_V2, MAXTRRES_B12_I9_D15_V2,
+           MAXTRRES_B12_I9_D16_V2 },
+         { 0, 0, 0, MAXTRRES_B12_I10_D12_V2, MAXTRRES_B12_I10_D13_V2,
+           MAXTRRES_B12_I10_D14_V2, MAXTRRES_B12_I10_D15_V2,
+           MAXTRRES_B12_I10_D16_V2 },
+         { 0, 0, 0, MAXTRRES_B12_I11_D12_V2, MAXTRRES_B12_I11_D13_V2,
+           MAXTRRES_B12_I11_D14_V2, MAXTRRES_B12_I11_D15_V2,
+           MAXTRRES_B12_I11_D16_V2 } },
+       { { 0, 0, 0, 0, MAXTRRES_B13_I8_D13_V2, MAXTRRES_B13_I8_D14_V2,
+           MAXTRRES_B13_I8_D15_V2, MAXTRRES_B13_I8_D16_V2 },
+         { 0, 0, 0, 0, MAXTRRES_B13_I9_D13_V2, MAXTRRES_B13_I9_D14_V2,
+           MAXTRRES_B13_I9_D15_V2, MAXTRRES_B13_I9_D16_V2 },
+         { 0, 0, 0, 0, MAXTRRES_B13_I10_D13_V2, MAXTRRES_B13_I10_D14_V2,
+           MAXTRRES_B13_I10_D15_V2, MAXTRRES_B13_I10_D16_V2 },
+         { 0, 0, 0, 0, MAXTRRES_B13_I11_D13_V2, MAXTRRES_B13_I11_D14_V2,
+           MAXTRRES_B13_I11_D15_V2, MAXTRRES_B13_I11_D16_V2 } },
+       { { 0, 0, 0, 0, 0, MAXTRRES_B14_I8_D14_V2, MAXTRRES_B14_I8_D15_V2,
+           MAXTRRES_B14_I8_D16_V2 },
+         { 0, 0, 0, 0, 0, MAXTRRES_B14_I9_D14_V2, MAXTRRES_B14_I9_D15_V2,
+           MAXTRRES_B14_I9_D16_V2 },
+         { 0, 0, 0, 0, 0, MAXTRRES_B14_I10_D14_V2, MAXTRRES_B14_I10_D15_V2,
+           MAXTRRES_B14_I10_D16_V2 },
+         { 0, 0, 0, 0, 0, MAXTRRES_B14_I11_D14_V2, MAXTRRES_B14_I11_D15_V2,
+           MAXTRRES_B14_I11_D16_V2 } },
+       { { 0, 0, 0, 0, 0, 0, MAXTRRES_B15_I8_D15_V2, MAXTRRES_B15_I8_D16_V2 },
+         { 0, 0, 0, 0, 0, 0, MAXTRRES_B15_I9_D15_V2, MAXTRRES_B15_I9_D16_V2 },
+         { 0, 0, 0, 0, 0, 0, MAXTRRES_B15_I10_D15_V2,
+           MAXTRRES_B15_I10_D16_V2 },
+         { 0, 0, 0, 0, 0, 0, MAXTRRES_B15_I11_D15_V2,
+           MAXTRRES_B15_I11_D16_V2 } },
+       { { 0, 0, 0, 0, 0, 0, 0, MAXTRRES_B16_I8_D16_V2 },
+         { 0, 0, 0, 0, 0, 0, 0, MAXTRRES_B16_I9_D16_V2 },
+         { 0, 0, 0, 0, 0, 0, 0, MAXTRRES_B16_I10_D16_V2 },
+         { 0, 0, 0, 0, 0, 0, 0, MAXTRRES_B16_I11_D16_V2, } },
+};
+
+/*
+ * Use this before we have a superblock, else would use XFS_DTOBT
+ */
+#define        DTOBT(d)        ((xfs_drfsbno_t)((d) >> (blocklog - BBSHIFT)))
+
+/*
+ * Use this for block reservations needed for mkfs's conditions
+ * (basically no fragmentation).
+ */
+#define        MKFS_BLOCKRES_INODE     \
+       ((uint)(XFS_IALLOC_BLOCKS(mp) + (XFS_IN_MAXLEVELS(mp) - 1)))
+#define        MKFS_BLOCKRES(rb)       \
+       ((uint)(MKFS_BLOCKRES_INODE + XFS_DA_NODE_MAXDEPTH + \
+       (XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - 1) + (rb)))
+
+static void
+get_subvol_stripe_wrapper(char *dfile, int type, int *sunit, int *swidth)
+{
+       struct stat64 sb;
+#if HAVE_LIBLVM
+        lv_t *lv;
+       char *vgname;
+#endif
+
+        if (!dfile)
+                return;
+        
+        if (stat64 (dfile, &sb)) {
+                fprintf (stderr, "Could not stat %s\n", dfile);
+               usage();
+        }
+
+#if HAVE_LIBLVM
+       /* If this is not an LVM volume, just bail out */
+        if (sb.st_rdev >> 8 != LVM_BLK_MAJOR) 
+               return;
+
+       /* Find volume group */
+        if (! (vgname = vg_name_of_lv (dfile))) {
+                fprintf (stderr, "Can't find volume group for %s\n", dfile);
+               usage();
+        }
+
+       /* Logical volume */
+        if (! lvm_tab_lv_check_exist (dfile)) {
+                fprintf (stderr, "Logical volume %s doesn't exist!\n", dfile);
+               usage();
+        }
+
+       /* Get status */
+        if (lv_status_byname (vgname, dfile, &lv) < 0 || lv == NULL) {
+                fprintf (stderr, "Could not get status info from %s\n", dfile);
+               usage();
+        }
+
+       /* Check that data is consistent */
+        if (lv_check_consistency (lv) < 0) {
+                fprintf (stderr, "Logical volume %s is inconsistent\n", dfile);
+               usage();
+        }
+        
+       /* Update sizes */
+        *sunit = lv->lv_stripesize;
+        *swidth = lv->lv_stripes * lv->lv_stripesize;
+
+#endif /* HAVE_LIBLVM */
+}
+
+
+static int
+get_default_blocksize(void)
+{
+       size_t  pagesize = getpagesize();
+       int     i;
+
+       /* default is between 4K and 16K */
+       for (i = 12; i <= 16; i++)
+               if ((1 << i) == pagesize)
+                       return pagesize;
+       return (1 << XFS_DFL_BLOCKSIZE_LOG);
+}
+
+
+int
+main(int argc, char **argv)
+{
+       __uint64_t              agcount;
+       xfs_agf_t               *agf;
+       xfs_agi_t               *agi;
+       xfs_agnumber_t          agno;
+       __uint64_t              agsize;
+       xfs_alloc_rec_t         *arec;
+       xfs_btree_sblock_t      *block;
+       int                     blflag;
+       int                     blocklog;
+       int                     blocksize;
+       int                     bsflag;
+       int                     bsize;
+       xfs_buf_t               *buf;
+       int                     c;
+       int                     daflag;
+       xfs_drfsbno_t           dblocks;
+       char                    *dfile;
+       int                     dirblocklog;
+       int                     dirblocksize;
+       int                     dirversion;
+       int                     do_overlap_checks;
+       char                    *dsize;
+       int                     dsunit;
+       int                     dswidth;
+       int                     extent_flagging;
+       int                     force_fs_overwrite;
+       int                     i;
+       int                     iaflag;
+       int                     ilflag;
+       int                     imaxpct;
+       int                     imflag;
+       int                     inodelog;
+       int                     inopblock;
+       int                     ipflag;
+       int                     isflag;
+       int                     isize;
+       int                     laflag;
+       int                     lalign;
+       int                     ldflag;
+       int                     liflag;
+       xfs_agnumber_t          logagno;
+       xfs_drfsbno_t           logblocks;
+       char                    *logfile;
+       int                     loginternal;
+       char                    *logsize;
+       xfs_dfsbno_t            logstart;
+       int                     lsflag;
+       int                     min_logblocks;
+       mnt_check_state_t       *mnt_check_state;
+       int                     mnt_partition_count;
+       xfs_mount_t             *mp;
+       xfs_mount_t             mbuf;
+       xfs_extlen_t            nbmblocks;
+       int                     nlflag;
+       int                     nodsflag;
+       xfs_alloc_rec_t         *nrec;
+       int                     nsflag;
+       int                     nvflag;
+       char                    *p;
+       char                    *protofile;
+       char                    *protostring;
+       int                     qflag;
+       xfs_drfsbno_t           rtblocks;
+       xfs_extlen_t            rtextblocks;
+       xfs_drtbno_t            rtextents;
+       char                    *rtextsize;
+       char                    *rtfile;
+       char                    *rtsize;
+       xfs_sb_t                *sbp;
+       int                     sectlog;
+       __uint64_t              tmp_agsize;
+       uuid_t                  uuid;
+       int                     worst_freelist;
+       libxfs_init_t           xi;
+       int                     xlv_dsunit;
+       int                     xlv_dswidth;
+
+       progname = basename(argv[0]);
+       agcount = 8;
+       blflag = bsflag = 0;
+       blocksize = get_default_blocksize();
+       blocklog = libxfs_highbit32(blocksize);
+       agsize = daflag = dblocks = 0;
+       ilflag = imflag = ipflag = isflag = 0;
+       liflag = laflag = lsflag = ldflag = 0;
+       loginternal = 1;
+       logagno = logblocks = rtblocks = 0;
+       nlflag = nsflag = nvflag = 0;
+       dirblocklog = dirblocksize = dirversion = 0;
+       qflag = 0;
+       imaxpct = inodelog = inopblock = isize = 0;
+       iaflag = XFS_IFLAG_ALIGN;
+       bzero(&xi, sizeof(xi));
+       xi.notvolok = 1;
+       dfile = logfile = rtfile = NULL;
+       dsize = logsize = rtsize = rtextsize = protofile = NULL;
+       opterr = 0;
+       dsunit = dswidth = nodsflag = lalign = 0;
+       do_overlap_checks = 1;
+       extent_flagging = 0;
+       force_fs_overwrite = 0;
+       worst_freelist = 0;
+
+       while ((c = getopt(argc, argv, "b:d:i:l:n:p:qr:CfV")) != EOF) {
+               switch (c) {
+               case 'C':
+                       do_overlap_checks = 0;
+                       break;
+               case 'f':
+                       force_fs_overwrite = 1;
+                       break;
+               case 'b':
+                       p = optarg;
+                       while (*p != '\0') {
+                               char    *value;
+
+                               switch (getsubopt(&p, (constpp)bopts, &value)) {
+                               case B_LOG:
+                                       if (!value)
+                                               reqval('b', bopts, B_LOG);
+                                       if (blflag)
+                                               respec('b', bopts, B_LOG);
+                                       if (bsflag)
+                                               conflict('b', bopts, B_SIZE,
+                                                        B_LOG);
+                                       blocklog = atoi(value);
+                                       if (blocklog <= 0)
+                                               illegal(value, "b log");
+                                       blocksize = 1 << blocklog;
+                                       blflag = 1;
+                                       break;
+                               case B_SIZE:
+                                       if (!value)
+                                               reqval('b', bopts, B_SIZE);
+                                       if (bsflag)
+                                               respec('b', bopts, B_SIZE);
+                                       if (blflag)
+                                               conflict('b', bopts, B_LOG,
+                                                        B_SIZE);
+                                       blocksize = cvtnum(0, value);
+                                       if (blocksize <= 0 ||
+                                           !ispow2(blocksize))
+                                               illegal(value, "b size");
+                                       blocklog = libxfs_highbit32(blocksize);
+                                       bsflag = 1;
+                                       break;
+                               default:
+                                       unknown('b', value);
+                               }
+                       }
+                       break;
+               case 'd':
+                       p = optarg;
+                       while (*p != '\0') {
+                               char    *value;
+
+                               switch (getsubopt(&p, (constpp)dopts, &value)) {
+                               case D_AGCOUNT:
+                                       if (!value)
+                                               reqval('d', dopts, D_AGCOUNT);
+                                       if (daflag)
+                                               respec('d', dopts, D_AGCOUNT);
+                                       agcount = (__uint64_t)atoll(value);
+                                       if ((__int64_t)agcount <= 0)
+                                               illegal(value, "d agcount");
+                                       daflag = 1;
+                                       break;
+                               case D_FILE:
+                                       if (!value)
+                                               value = "1";
+                                       xi.disfile = atoi(value);
+                                       if (xi.disfile < 0 || xi.disfile > 1)
+                                               illegal(value, "d file");
+                                       if (xi.disfile)
+                                               xi.dcreat = 1;
+                                       break;
+                               case D_NAME:
+                                       if (!value)
+                                               reqval('d', dopts, D_NAME);
+                                       if (xi.dname)
+                                               respec('d', dopts, D_NAME);
+                                       xi.dname = value;
+                                       break;
+                               case D_SIZE:
+                                       if (!value)
+                                               reqval('d', dopts, D_SIZE);
+                                       if (dsize)
+                                               respec('d', dopts, D_SIZE);
+                                       dsize = value;
+                                       break;
+                               case D_SUNIT:
+                                       if (!value)
+                                               reqval('d', dopts, D_SUNIT);
+                                       if (dsunit)
+                                               respec('d', dopts, D_SUNIT);
+                                       dsunit = cvtnum(0, value);
+                                       break;
+                               case D_SWIDTH:
+                                       if (!value)
+                                               reqval('d', dopts, D_SWIDTH);
+                                       if (dswidth)
+                                               respec('d', dopts, D_SWIDTH);
+                                       dswidth = cvtnum(0, value);
+                                       break;
+                               case D_UNWRITTEN:
+                                       if (!value)
+                                           reqval('d', dopts, D_UNWRITTEN);
+                                       i = atoi(value);
+                                       if (i < 0 || i > 1)
+                                           illegal(value, "d unwritten");
+                                       extent_flagging = i;
+                                       break;
+                               default:
+                                       unknown('d', value);
+                               }
+                       }
+                       break;
+               case 'i':
+                       p = optarg;
+                       while (*p != '\0') {
+                               char    *value;
+
+                               switch (getsubopt(&p, (constpp)iopts, &value)) {
+                               case I_ALIGN:
+                                       if (!value)
+                                               value = "1";
+                                       iaflag = atoi(value);
+                                       if (iaflag < 0 || iaflag > 1)
+                                               illegal(value, "i align");
+                                       break;
+                               case I_LOG:
+                                       if (!value)
+                                               reqval('i', iopts, I_LOG);
+                                       if (ilflag)
+                                               respec('i', iopts, I_LOG);
+                                       if (ipflag)
+                                               conflict('i', iopts, I_PERBLOCK,
+                                                        I_LOG);
+                                       if (isflag)
+                                               conflict('i', iopts, I_SIZE,
+                                                        I_LOG);
+                                       inodelog = atoi(value);
+                                       if (inodelog <= 0)
+                                               illegal(value, "i log");
+                                       isize = 1 << inodelog;
+                                       ilflag = 1;
+                                       break;
+                               case I_MAXPCT:
+                                       if (!value)
+                                               reqval('i', iopts, I_MAXPCT);
+                                       if (imflag)
+                                               respec('i', iopts, I_MAXPCT);
+                                       imaxpct = atoi(value);
+                                       if (imaxpct < 0 || imaxpct > 100)
+                                               illegal(value, "i maxpct");
+                                       imflag = 1;
+                                       break;
+                               case I_PERBLOCK:
+                                       if (!value)
+                                               reqval('i', iopts, I_PERBLOCK);
+                                       if (ilflag)
+                                               conflict('i', iopts, I_LOG,
+                                                        I_PERBLOCK);
+                                       if (ipflag)
+                                               respec('i', iopts, I_PERBLOCK);
+                                       if (isflag)
+                                               conflict('i', iopts, I_SIZE,
+                                                        I_PERBLOCK);
+                                       inopblock = atoi(value);
+                                       if (inopblock <
+                                               XFS_MIN_INODE_PERBLOCK ||
+                                           !ispow2(inopblock))
+                                               illegal(value, "i perblock");
+                                       ipflag = 1;
+                                       break;
+                               case I_SIZE:
+                                       if (!value)
+                                               reqval('i', iopts, I_SIZE);
+                                       if (ilflag)
+                                               conflict('i', iopts, I_LOG,
+                                                        I_SIZE);
+                                       if (ipflag)
+                                               conflict('i', iopts, I_PERBLOCK,
+                                                        I_SIZE);
+                                       if (isflag)
+                                               respec('i', iopts, I_SIZE);
+                                       isize = cvtnum(0, value);
+                                       if (isize <= 0 || !ispow2(isize))
+                                               illegal(value, "i size");
+                                       inodelog = libxfs_highbit32(isize);
+                                       isflag = 1;
+                                       break;
+                               default:
+                                       unknown('i', value);
+                               }
+                       }
+                       break;
+               case 'l':
+                       p = optarg;
+                       while (*p != '\0') {
+                               char    *value;
+
+                               switch (getsubopt(&p, (constpp)lopts, &value)) {
+                               case L_AGNUM:
+                                       if (laflag)
+                                               respec('l', lopts, L_AGNUM);
+
+                                       if (ldflag) 
+                                               conflict('l', lopts, L_AGNUM, L_DEV);
+
+                                       logagno = atoi(value);
+                                       laflag = 1;
+                                       break;
+                               case L_DEV:
+                                       if (!value) {
+                                               fprintf (stderr, "Must specify log device\n");
+                                               usage();
+                                       }
+
+                                       if (laflag)
+                                               conflict('l', lopts, L_AGNUM, L_DEV);
+
+                                       if (liflag)
+                                               conflict('l', lopts, L_INTERNAL, L_DEV);
+                                       
+                                       ldflag = 1;
+                                       loginternal = 0;
+                                       logfile = value;
+                                       xi.logname = value;
+                                       break;
+#ifdef HAVE_VOLUME_MANAGER
+                               case L_FILE:
+                                       if (!value)
+                                               value = "1";
+                                       if (loginternal)
+                                               conflict('l', lopts, L_INTERNAL,
+                                                        L_FILE);
+                                       xi.lisfile = atoi(value);
+                                       if (xi.lisfile < 0 || xi.lisfile > 1)
+                                               illegal(value, "l file");
+                                       if (xi.lisfile)
+                                               xi.lcreat = 1;
+                                       break;
+#endif
+                               case L_INTERNAL:
+                                       if (!value)
+                                               value = "1";
+
+                                       if (ldflag) 
+                                               conflict('l', lopts, L_INTERNAL, L_DEV);
+#ifdef HAVE_VOLUME_MANAGER
+                                       if (xi.logname)
+                                               conflict('l', lopts, L_NAME,
+                                                        L_INTERNAL);
+                                       if (xi.lisfile)
+                                               conflict('l', lopts, L_FILE,
+                                                        L_INTERNAL);
+#endif
+                                       if (liflag)
+                                               respec('l', lopts, L_INTERNAL);
+                                       loginternal = atoi(value);
+                                       if (loginternal < 0 || loginternal > 1)
+                                               illegal(value, "l internal");
+                                       liflag = 1;
+                                       break;
+#ifdef HAVE_VOLUME_MANAGER
+                               case L_NAME:
+                                       if (!value)
+                                               reqval('l', lopts, L_NAME);
+                                       if (loginternal)
+                                               conflict('l', lopts, L_INTERNAL,
+                                                        L_NAME);
+                                       if (xi.logname)
+                                               respec('l', lopts, L_NAME);
+                                       xi.logname = value;
+                                       break;
+#endif
+                               case L_SIZE:
+                                       if (!value)
+                                               reqval('l', lopts, L_SIZE);
+                                       if (logsize)
+                                               respec('l', lopts, L_SIZE);
+                                       logsize = value;
+                                       lsflag = 1;
+                                       break;
+                               default:
+                                       unknown('l', value);
+                               }
+                       }
+                       break;
+               case 'n':
+                       p = optarg;
+                       while (*p != '\0') {
+                               char    *value;
+
+                               switch (getsubopt(&p, (constpp)nopts, &value)) {
+                               case N_LOG:
+                                       if (!value)
+                                               reqval('n', nopts, N_LOG);
+                                       if (nlflag)
+                                               respec('n', nopts, N_LOG);
+                                       if (nsflag)
+                                               conflict('n', nopts, N_SIZE,
+                                                        N_LOG);
+                                       dirblocklog = atoi(value);
+                                       if (dirblocklog <= 0)
+                                               illegal(value, "n log");
+                                       dirblocksize = 1 << dirblocklog;
+                                       nlflag = 1;
+                                       break;
+                               case N_SIZE:
+                                       if (!value)
+                                               reqval('n', nopts, N_SIZE);
+                                       if (nsflag)
+                                               respec('n', nopts, N_SIZE);
+                                       if (nlflag)
+                                               conflict('n', nopts, N_LOG,
+                                                        N_SIZE);
+                                       dirblocksize = cvtnum(0, value);
+                                       if (dirblocksize <= 0 ||
+                                           !ispow2(dirblocksize))
+                                               illegal(value, "n size");
+                                       dirblocklog =
+                                               libxfs_highbit32(dirblocksize);
+                                       nsflag = 1;
+                                       break;
+                               case N_VERSION:
+                                       if (!value)
+                                               reqval('n', nopts, N_VERSION);
+                                       if (nvflag)
+                                               respec('n', nopts, N_VERSION);
+                                       dirversion = atoi(value);
+                                       if (dirversion < 1 || dirversion > 2)
+                                               illegal(value, "n version");
+                                       nvflag = 1;
+                                       break;
+                               default:
+                                       unknown('n', value);
+                               }
+                       }
+                       break;
+               case 'p':
+                       if (protofile)
+                               respec('p', 0, 0);
+                       protofile = optarg;
+                       break;
+               case 'q':
+                       qflag = 1;
+                       break;
+               case 'r':
+                       p = optarg;
+                       while (*p != '\0') {
+                               char    *value;
+
+                               switch (getsubopt(&p, (constpp)ropts, &value)) {
+                               case R_EXTSIZE:
+                                       if (!value)
+                                               reqval('r', ropts, R_EXTSIZE);
+                                       if (rtextsize)
+                                               respec('r', ropts, R_EXTSIZE);
+                                       rtextsize = value;
+                                       break;
+                               case R_DEV:
+                                       if (!value)
+                                               reqval('r', ropts, R_DEV);
+                                       xi.rtname = value;
+                                       break;
+#ifdef HAVE_VOLUME_MANAGER
+                               case R_FILE:
+                                       if (!value)
+                                               value = "1";
+                                       xi.risfile = atoi(value);
+                                       if (xi.risfile < 0 || xi.risfile > 1)
+                                               illegal(value, "r file");
+                                       if (xi.risfile)
+                                               xi.rcreat = 1;
+                                       break;
+                               case R_NAME:
+                                       if (!value)
+                                               reqval('r', ropts, R_NAME);
+                                       if (xi.rtname)
+                                               respec('r', ropts, R_NAME);
+                                       xi.rtname = value;
+                                       break;
+#endif
+                               case R_SIZE:
+                                       if (!value)
+                                               reqval('r', ropts, R_SIZE);
+                                       if (rtsize)
+                                               respec('r', ropts, R_SIZE);
+                                       rtsize = value;
+                                       break;
+
+                               default:
+                                       unknown('r', value);
+                               }
+                       }
+                       break;
+               case 'V':
+                       printf("%s version %s\n", progname, VERSION);
+                       break;
+               case '?':
+                       unknown(optopt, "");
+               }
+       }
+       if (argc - optind > 1) {
+               fprintf(stderr, "extra arguments\n");
+               usage();
+       } else if (argc - optind == 1) {
+               dfile = xi.volname = argv[optind];
+               if (xi.dname) {
+                       fprintf(stderr,
+                               "cannot specify both %s and -d name=%s\n",
+                               xi.volname, xi.dname);
+                       usage();
+               }
+       } else
+               dfile = xi.dname;
+       /* option post-processing */
+       if (blocksize < XFS_MIN_BLOCKSIZE || blocksize > XFS_MAX_BLOCKSIZE) {
+               fprintf(stderr, "illegal block size %d\n", blocksize);
+               usage();
+       }
+       if (!nvflag)
+               dirversion = (nsflag || nlflag) ? 2 : XFS_DFL_DIR_VERSION;
+       switch (dirversion) {
+       case 1:
+               if ((nsflag || nlflag) && dirblocklog != blocklog) {
+                       fprintf(stderr, "illegal directory block size %d\n",
+                               dirblocksize);
+                       usage();
+               }
+               break;
+       case 2:
+               if (nsflag || nlflag) {
+                       if (dirblocksize < blocksize ||
+                           dirblocksize > XFS_MAX_BLOCKSIZE) {
+                               fprintf(stderr,
+                                       "illegal directory block size %d\n",
+                                       dirblocksize);
+                               usage();
+                       }
+               } else {
+                       if (blocksize < (1 << XFS_MIN_REC_DIRSIZE))
+                               dirblocklog = XFS_MIN_REC_DIRSIZE;
+                       else
+                               dirblocklog = blocklog;
+                       dirblocksize = 1 << dirblocklog;
+               }
+               break;
+       }
+       if (!daflag)
+               agcount = 8;
+
+       if (xi.disfile && (!dsize || !xi.dname)) {
+               fprintf(stderr,
+                       "if -d file then -d name and -d size are required\n");
+               usage();
+       }
+       if (dsize) {
+               __uint64_t dbytes;
+
+               dbytes = cvtnum(blocksize, dsize);
+               if (dbytes % XFS_MIN_BLOCKSIZE) {
+                       fprintf(stderr,
+                       "illegal data length %lld, not a multiple of %d\n",
+                               dbytes, XFS_MIN_BLOCKSIZE);
+                       usage();
+               }
+               dblocks = (xfs_drfsbno_t)(dbytes >> blocklog);
+               if (dbytes % blocksize)
+                       fprintf(stderr,
+       "warning: data length %lld not a multiple of %d, truncated to %lld\n",
+                               dbytes, blocksize, dblocks << blocklog);
+       }
+       if (ipflag) {
+               inodelog = blocklog - libxfs_highbit32(inopblock);
+               isize = 1 << inodelog;
+       } else if (!ilflag && !isflag) {
+               inodelog = XFS_DINODE_DFL_LOG;
+               isize = 1 << inodelog;
+       }
+#ifdef HAVE_VOLUME_MANAGER
+       if (xi.lisfile && (!logsize || !xi.logname)) {
+               fprintf(stderr,
+                       "if -l file then -l name and -l size are required\n");
+               usage();
+       }
+#endif
+       if (logsize) {
+               __uint64_t logbytes;
+
+               logbytes = cvtnum(blocksize, logsize);
+               if (logbytes % XFS_MIN_BLOCKSIZE) {
+                       fprintf(stderr,
+                       "illegal log length %lld, not a multiple of %d\n",
+                               logbytes, XFS_MIN_BLOCKSIZE);
+                       usage();
+               }
+               logblocks = (xfs_drfsbno_t)(logbytes >> blocklog);
+               if (logbytes % blocksize)
+                       fprintf(stderr,
+       "warning: log length %lld not a multiple of %d, truncated to %lld\n",
+                               logbytes, blocksize, logblocks << blocklog);
+       }
+#ifdef HAVE_VOLUME_MANAGER
+       if (xi.risfile && (!rtsize || !xi.rtname)) {
+               fprintf(stderr,
+                       "if -r file then -r name and -r size are required\n");
+               usage();
+       }
+#endif
+       if (rtsize) {
+               __uint64_t rtbytes;
+
+               rtbytes = cvtnum(blocksize, rtsize);
+               if (rtbytes % XFS_MIN_BLOCKSIZE) {
+                       fprintf(stderr,
+                       "illegal rt length %lld, not a multiple of %d\n",
+                               rtbytes, XFS_MIN_BLOCKSIZE);
+                       usage();
+               }
+               rtblocks = (xfs_drfsbno_t)(rtbytes >> blocklog);
+               if (rtbytes % blocksize)
+                       fprintf(stderr,
+       "warning: rt length %lld not a multiple of %d, truncated to %lld\n",
+                               rtbytes, blocksize, rtblocks << blocklog);
+       }
+       /*
+        * If specified, check rt extent size against its constraints.
+        */
+       if (rtextsize) {
+               __uint64_t rtextbytes;
+
+               rtextbytes = cvtnum(blocksize, rtextsize);
+               if (rtextbytes % blocksize) {
+                       fprintf(stderr,
+                       "illegal rt extent size %lld, not a multiple of %d\n",
+                               rtextbytes, blocksize);
+                       usage();
+               }
+               if (rtextbytes > XFS_MAX_RTEXTSIZE) {
+                       fprintf(stderr,
+                               "rt extent size %s too large, maximum %d\n",
+                               rtextsize, XFS_MAX_RTEXTSIZE);
+                       usage();
+               }
+               if (rtextbytes < XFS_MIN_RTEXTSIZE) {
+                       fprintf(stderr,
+                               "rt extent size %s too small, minimum %d\n",
+                               rtextsize, XFS_MIN_RTEXTSIZE);
+                       usage();
+               }
+               rtextblocks = (xfs_extlen_t)(rtextbytes >> blocklog);
+       } else {
+               /*
+                * If realtime extsize has not been specified by the user,
+                * and the underlying volume is striped, then set rtextblocks
+                * to the stripe width.
+                */
+               int dummy1, rswidth;
+               __uint64_t rtextbytes;
+               dummy1 = rswidth = 0;
+                
+                if (!xi.disfile)
+                       get_subvol_stripe_wrapper(dfile, SVTYPE_RT, &dummy1, 
+                                                   &rswidth);
+
+               /* check that rswidth is a multiple of fs blocksize */
+               if (rswidth && !(BBTOB(rswidth) % blocksize)) {
+                       rswidth = DTOBT(rswidth);
+                       rtextbytes = rswidth << blocklog;
+                       if (XFS_MIN_RTEXTSIZE <= rtextbytes &&
+                                (rtextbytes <= XFS_MAX_RTEXTSIZE))  {
+                                        rtextblocks = rswidth;
+                       } else {
+                               rtextblocks = XFS_DFL_RTEXTSIZE >> blocklog;
+                       }
+               } else
+                       rtextblocks = XFS_DFL_RTEXTSIZE >> blocklog;
+       }
+
+       /*
+        * Check some argument sizes against mins, maxes.
+        */
+       if (isize > blocksize / XFS_MIN_INODE_PERBLOCK ||
+           isize < XFS_DINODE_MIN_SIZE ||
+           isize > XFS_DINODE_MAX_SIZE) {
+               int     maxsz;
+
+               fprintf(stderr, "illegal inode size %d\n", isize);
+               maxsz = MIN(blocksize / XFS_MIN_INODE_PERBLOCK,
+                           XFS_DINODE_MAX_SIZE);
+               if (XFS_DINODE_MIN_SIZE == maxsz)
+                       fprintf(stderr,
+                       "allowable inode size with %d byte blocks is %d\n",
+                               blocksize, XFS_DINODE_MIN_SIZE);
+               else
+                       fprintf(stderr,
+       "allowable inode size with %d byte blocks is between %d and %d\n",
+                               blocksize, XFS_DINODE_MIN_SIZE, maxsz);
+               usage();
+       }
+
+       if (dsunit && !dswidth || !dsunit && dswidth) {
+               fprintf(stderr,
+"both sunit and swidth options have to be specified\n");
+               usage();
+       }
+
+       if (dsunit && dswidth % dsunit != 0) {
+               fprintf(stderr,
+"mount: stripe width (%d) has to be a multiple of the stripe unit (%d)\n",
+                       dswidth, dsunit);
+               return 1;
+       }
+
+       /* other global variables */
+       sectlog = 9;            /* i.e. 512 bytes */
+
+       /*
+        * Initialize.  This will open the log and rt devices as well.
+        */
+       if (!libxfs_init(&xi))
+               usage();
+       if (!xi.ddev) {
+               fprintf(stderr, "no device name given in argument list\n");
+               usage();
+       }
+
+       /*
+        * Check whether this partition contains a known filesystem.
+        */
+
+       if (force_fs_overwrite == 0) {
+               char *fstyp;
+               int fsfound = 0;
+
+               fstyp = (char *) mnt_known_fs_type (dfile);
+               
+               if (fstyp != NULL) {
+                       fprintf(stderr, "%s: "
+                       "%s appears to contain an existing filesystem (%s).\n",
+                               progname, dfile, fstyp);
+                       fsfound = 1;
+               }
+
+               if (logfile && *logfile) {
+                       fstyp = (char *) mnt_known_fs_type (logfile);
+                       
+                       if (fstyp != NULL) {
+                               fprintf(stderr, "%s: "
+                       "%s appears to contain an existing filesystem (%s).\n",
+                                       progname, logfile, fstyp);
+                               fsfound = 1;
+                       }
+               }
+
+               if (xi.rtname && *xi.rtname) {
+                       fstyp = (char *) mnt_known_fs_type (xi.rtname);
+                       
+                       if (fstyp != NULL) {
+                               fprintf(stderr, "%s: "
+                       "%s appears to contain an existing filesystem (%s).\n",
+                                       progname, xi.rtname, fstyp);
+                               fsfound = 1;
+                       }
+               }
+
+               if (fsfound) {
+                       fprintf(stderr, "%s: "
+                               "Use the -f option to force overwrite\n",
+                               progname);
+                       exit(1);
+               }
+       }
+
+       if (!xi.disfile && do_overlap_checks) {
+               /*
+                * do partition overlap check
+                * If this is a straight file we assume that it's been created
+                * before the call to mnt_check_init()
+                */
+
+                if (mnt_check_init(&mnt_check_state) == -1) {
+                        fprintf(stderr,
+                               "unable to initialize mount checking "
+                               "routines, bypassing protection checks.\n");
+               } else {
+                       mnt_partition_count = mnt_find_mount_conflicts(
+                               mnt_check_state, dfile);
+
+                       /* 
+                        * ignore -1 return codes, since 3rd party devices
+                        * may not be part of hinv.
+                        */
+                       if (mnt_partition_count > 0) {
+                               if (mnt_causes_test(mnt_check_state, MNT_CAUSE_MOUNTED)) {
+                                       fprintf(stderr, "%s: "
+                                               "%s is already in use.\n",
+                                               progname, dfile);
+                               } else if (mnt_causes_test(mnt_check_state, MNT_CAUSE_OVERLAP)) {
+                                       fprintf(stderr, "%s: "
+                                               "%s overlaps partition(s) "
+                                               "already in use.\n",
+                                               progname, dfile);
+                               } else {
+                                       mnt_causes_show(mnt_check_state, stderr, progname);
+                               }
+                               fprintf(stderr, "\n");
+                               fflush(stderr);
+                               mnt_plist_show(mnt_check_state, stderr, progname);
+                               fprintf(stderr, "\n");
+                       }
+                       mnt_check_end(mnt_check_state);
+                       if (mnt_partition_count > 0) {
+                               usage();
+                       }
+               }
+       }
+
+       if (!liflag && !ldflag)
+               loginternal = xi.logdev == 0;
+       if (xi.logname)
+               logfile = xi.logname;
+       else if (loginternal)
+               logfile = "internal log";
+       else if (xi.volname && xi.logdev)
+               logfile = "volume log";
+       else if (!ldflag) {
+               fprintf(stderr, "no log subvolume or internal log\n");
+               usage();
+       }
+       if (xi.rtname)
+               rtfile = xi.rtname;
+       else
+       if (xi.volname && xi.rtdev)
+               rtfile = "volume rt";
+       else if (!xi.rtdev)
+               rtfile = "none";
+       if (dsize && xi.dsize > 0 && dblocks > DTOBT(xi.dsize)) {
+               fprintf(stderr,
+"size %s specified for data subvolume is too large, maximum is %lld blocks\n",
+                       dsize, DTOBT(xi.dsize));
+               usage();
+       } else if (!dsize && xi.dsize > 0)
+               dblocks = DTOBT(xi.dsize);
+       else if (!dsize) {
+               fprintf(stderr, "can't get size of data subvolume\n");
+               usage();
+       } 
+       if (dblocks < XFS_MIN_DATA_BLOCKS) {
+               fprintf(stderr,
+               "size %lld of data subvolume is too small, minimum %d blocks\n",
+                       dblocks, XFS_MIN_DATA_BLOCKS);
+               usage();
+       }
+       if (xi.logdev && loginternal) {
+               fprintf(stderr, "can't have both external and internal logs\n");
+               usage();
+       }
+       if (dirversion == 1)
+               i = max_trres_v1[blocklog - XFS_MIN_BLOCKSIZE_LOG]
+                               [inodelog - XFS_DINODE_MIN_LOG];
+       else
+               i = max_trres_v2[blocklog - XFS_MIN_BLOCKSIZE_LOG]
+                               [inodelog - XFS_DINODE_MIN_LOG]
+                               [dirblocklog - XFS_MIN_BLOCKSIZE_LOG];
+       min_logblocks = MAX(XFS_MIN_LOG_BLOCKS, i * XFS_MIN_LOG_FACTOR);
+       if (logsize && xi.logBBsize > 0 && logblocks > DTOBT(xi.logBBsize)) {
+               fprintf(stderr,
+"size %s specified for log subvolume is too large, maximum is %lld blocks\n",
+                       logsize, DTOBT(xi.logBBsize));
+               usage();
+       } else if (!logsize && xi.logBBsize > 0)
+               logblocks = DTOBT(xi.logBBsize);
+       else if (logsize && !xi.logdev && !loginternal) {
+               fprintf(stderr,
+                       "size specified for non-existent log subvolume\n");
+               usage();
+       } else if (loginternal && logsize && logblocks >= dblocks) {
+               fprintf(stderr, "size %lld too large for internal log\n",
+                       logblocks);
+               usage();
+       } else if (!loginternal && !xi.logdev)
+               logblocks = 0;
+       else if (loginternal && !logsize)
+               logblocks = MAX(XFS_DFL_LOG_SIZE, i * XFS_DFL_LOG_FACTOR);
+       if (logblocks < min_logblocks) {
+               fprintf(stderr,
+               "log size %lld blocks too small, minimum size is %d blocks\n",
+                       logblocks, min_logblocks);
+               usage();
+       }
+       if (logblocks > XFS_MAX_LOG_BLOCKS) {
+               fprintf(stderr,
+               "log size %lld blocks too large, maximum size is %d blocks\n",
+                       logblocks, XFS_MAX_LOG_BLOCKS);
+               usage();
+       }
+       if ((logblocks << blocklog) > XFS_MAX_LOG_BYTES) {
+               fprintf(stderr,
+               "log size %lld bytes too large, maximum size is %d bytes\n",
+                       logblocks << blocklog, XFS_MAX_LOG_BYTES);
+               usage();
+       }
+       if (rtsize && xi.rtsize > 0 && rtblocks > DTOBT(xi.rtsize)) {
+               fprintf(stderr,
+"size %s specified for rt subvolume is too large, maximum is %lld blocks\n",
+                       rtsize, DTOBT(xi.rtsize));
+               usage();
+       } else if (!rtsize && xi.rtsize > 0)
+               rtblocks = DTOBT(xi.rtsize);
+       else if (rtsize && !xi.rtdev) {
+               fprintf(stderr,
+                       "size specified for non-existent rt subvolume\n");
+               usage();
+       }
+       if (xi.rtdev) {
+               rtextents = rtblocks / rtextblocks;
+               nbmblocks = (xfs_extlen_t)howmany(rtextents, NBBY * blocksize);
+       } else {
+               rtextents = rtblocks = 0;
+               nbmblocks = 0;
+       }
+       agsize = dblocks / agcount + (dblocks % agcount != 0);
+
+       /*
+        * If the ag size is too small, complain if agcount was specified,
+        * and fix it otherwise.
+        */
+       if (agsize < XFS_AG_MIN_BLOCKS(blocklog)) {
+               if (daflag) {
+                       fprintf(stderr,
+                               "too many allocation groups for size\n");
+                       fprintf(stderr, "need at most %lld allocation groups\n",
+                               dblocks / XFS_AG_MIN_BLOCKS(blocklog) +
+                               (dblocks % XFS_AG_MIN_BLOCKS(blocklog) != 0));
+                       usage();
+               }
+               agsize = XFS_AG_MIN_BLOCKS(blocklog);
+               if (dblocks < agsize)
+                       agcount = 1;
+               else {
+                       agcount = dblocks / agsize;
+                       agsize = dblocks / agcount + (dblocks % agcount != 0);
+               }
+       }
+       /*
+        * If the ag size is too large, complain if agcount was specified,
+        * and fix it otherwise.
+        */
+       else if (agsize > XFS_AG_MAX_BLOCKS(blocklog)) {
+               if (daflag) {
+                       fprintf(stderr, "too few allocation groups for size\n");
+                       fprintf(stderr,
+                               "need at least %lld allocation groups\n",
+                               dblocks / XFS_AG_MAX_BLOCKS(blocklog) + 
+                               (dblocks % XFS_AG_MAX_BLOCKS(blocklog) != 0));
+                       usage();
+               }
+               agsize = XFS_AG_MAX_BLOCKS(blocklog);
+               agcount = dblocks / agsize + (dblocks % agsize != 0);
+               agsize = dblocks / agcount + (dblocks % agcount != 0);
+       }
+       /*
+        * If agcount was not specified, and agsize is larger than
+        * we'd like, make it the size we want.
+        */
+       if (!daflag && agsize > XFS_AG_BEST_BLOCKS(blocklog)) {
+               agsize = XFS_AG_BEST_BLOCKS(blocklog);
+               agcount = dblocks / agsize + (dblocks % agsize != 0);
+               agsize = dblocks / agcount + (dblocks % agcount != 0);
+       }
+       /*
+        * If agcount is too large, make it smaller.
+        */
+       if (agcount > XFS_MAX_AGNUMBER + 1) {
+               agcount = XFS_MAX_AGNUMBER + 1;
+               agsize = dblocks / agcount + (dblocks % agcount != 0);
+               if (agsize > XFS_AG_MAX_BLOCKS(blocklog)) {
+                       /*
+                        * We're confused.
+                        */
+                       fprintf(stderr, "%s: can't compute agsize/agcount\n",
+                               progname);
+                       exit(1);
+               }
+       }
+
+       xlv_dsunit = xlv_dswidth = 0;
+        if (!xi.disfile)
+               get_subvol_stripe_wrapper(dfile, SVTYPE_DATA, &xlv_dsunit, 
+                               &xlv_dswidth);
+       if (dsunit) {
+
+               if (xlv_dsunit && xlv_dsunit != dsunit) {
+                       fprintf(stderr, "%s: "
+  "Specified data stripe unit %d is not the same as the xlv stripe unit %d\n", 
+                               progname, dsunit, xlv_dsunit);
+                       exit(1);
+               }
+               if (xlv_dswidth && xlv_dswidth != dswidth) {
+                       fprintf(stderr, "%s: "
+"Specified data stripe width (%d) is not the same as the xlv stripe width (%d)\n",
+                               progname, dswidth, xlv_dswidth);
+                       exit(1);
+               }
+       } else {
+               dsunit = xlv_dsunit;
+               dswidth = xlv_dswidth;
+               nodsflag = 1;
+       }
+
+       /*
+        * If dsunit is a multiple of fs blocksize, then check that is a
+        * multiple of the agsize too
+        */
+       if (dsunit && !(BBTOB(dsunit) % blocksize) && 
+           dswidth && !(BBTOB(dswidth) % blocksize)) {
+
+               /* convert from 512 byte blocks to fs blocksize */
+               dsunit = DTOBT(dsunit);
+               dswidth = DTOBT(dswidth);
+
+               /* 
+                * agsize is not a multiple of dsunit
+                */
+               if ((agsize % dsunit) != 0) {
+                       /*
+                        * round up to stripe unit boundary. Also make sure 
+                        * that agsize is still larger than 
+                        * XFS_AG_MIN_BLOCKS(blocklog)
+                        */
+                       tmp_agsize = ((agsize + (dsunit - 1))/ dsunit) * dsunit;
+                       if ((tmp_agsize >= XFS_AG_MIN_BLOCKS(blocklog)) &&
+                           (tmp_agsize <= XFS_AG_MAX_BLOCKS(blocklog)) &&
+                           !daflag) {
+                               agsize = tmp_agsize;
+                               agcount = dblocks/agsize + 
+                                               (dblocks % agsize != 0);
+                       } else {
+                               if (nodsflag)
+                                       dsunit = dswidth = 0;
+                               else { 
+                                       fprintf(stderr,
+"Allocation group size %lld is not a multiple of the stripe unit %d\n",
+                                               agsize, dsunit);
+                                       exit(1);
+                               }
+                       }
+               }
+       } else {
+               if (nodsflag)
+                       dsunit = dswidth = 0;
+               else { 
+                       fprintf(stderr, "%s: "
+"Stripe unit(%d) or stripe width(%d) is not a multiple of the block size(%d)\n",
+                               progname, dsunit, dswidth, blocksize);  
+                       exit(1);
+               }
+       }
+
+       protostring = setup_proto(protofile);
+       bsize = 1 << (blocklog - BBSHIFT);
+       buf = libxfs_getbuf(xi.ddev, XFS_SB_DADDR, 1);
+       mp = &mbuf;
+       sbp = &mp->m_sb;
+       bzero(mp, sizeof(xfs_mount_t));
+       sbp->sb_blocklog = (__uint8_t)blocklog;
+       sbp->sb_agblklog = (__uint8_t)libxfs_log2_roundup((unsigned int)agsize);
+       mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT;
+       if (loginternal) {
+               if (logblocks > agsize - XFS_PREALLOC_BLOCKS(mp)) {
+                       fprintf(stderr,
+       "internal log size %lld too large, must fit in allocation group\n",
+                               logblocks);
+                       usage();
+               }
+               if (laflag) {
+                       if (logagno >= agcount) {
+                               fprintf(stderr,
+                       "log ag number %d too large, must be less than %lld\n",
+                                       logagno, agcount);
+                               usage();
+                       }
+               } else
+                       logagno = (xfs_agnumber_t)(agcount / 2);
+
+               logstart = XFS_AGB_TO_FSB(mp, logagno, XFS_PREALLOC_BLOCKS(mp));
+               /*
+                * Align the logstart at stripe unit boundary.
+                */
+               if (dsunit && ((logstart % dsunit) != 0)) {
+                       logstart = ((logstart + (dsunit - 1))/dsunit) * dsunit;
+
+                       /* 
+                        * Make sure that the log size is a multiple of the
+                        * stripe unit
+                        */
+                       if ((logblocks % dsunit) != 0) 
+                          if (!lsflag) 
+                               logblocks = ((logblocks + (dsunit - 1))
+                                                       /dsunit) * dsunit;
+                          else {
+                               fprintf(stderr,
+       "internal log size %lld is not a multiple of the stripe unit %d\n", 
+                                       logblocks, dsunit);
+                               usage();
+                          }
+
+                       if (logblocks > agsize-XFS_FSB_TO_AGBNO(mp,logstart)) {
+                               fprintf(stderr,
+       "Due to stripe alignment, the internal log size %lld is too large.\n"
+       "Must fit in allocation group\n",
+                                       logblocks);
+                               usage();
+                       }
+                       lalign = 1;
+               }
+       } else
+               logstart = 0;
+       sbp->sb_magicnum = XFS_SB_MAGIC;
+       sbp->sb_blocksize = blocksize;
+       sbp->sb_dblocks = dblocks;
+       sbp->sb_rblocks = rtblocks;
+       sbp->sb_rextents = rtextents;
+       uuid_generate(uuid);
+       uuid_copy(sbp->sb_uuid, uuid);
+       sbp->sb_logstart = logstart;
+       sbp->sb_rootino = sbp->sb_rbmino = sbp->sb_rsumino = NULLFSINO;
+       sbp->sb_rextsize = rtextblocks;
+       sbp->sb_agblocks = (xfs_agblock_t)agsize;
+       sbp->sb_agcount = (xfs_agnumber_t)agcount;
+       sbp->sb_rbmblocks = nbmblocks;
+       sbp->sb_logblocks = (xfs_extlen_t)logblocks;
+       sbp->sb_sectsize = 1 << sectlog;
+       sbp->sb_inodesize = (__uint16_t)isize;
+       sbp->sb_inopblock = (__uint16_t)(blocksize / isize);
+       sbp->sb_sectlog = (__uint8_t)sectlog;
+       sbp->sb_inodelog = (__uint8_t)inodelog;
+       sbp->sb_inopblog = (__uint8_t)(blocklog - inodelog);
+       sbp->sb_rextslog =
+               (__uint8_t)(rtextents ?
+                       libxfs_highbit32((unsigned int)rtextents) : 0);
+       sbp->sb_inprogress = 1; /* mkfs is in progress */
+       sbp->sb_imax_pct = imflag ? imaxpct : XFS_DFL_IMAXIMUM_PCT;
+       sbp->sb_icount = 0;
+       sbp->sb_ifree = 0;
+       sbp->sb_fdblocks = dblocks - agcount * XFS_PREALLOC_BLOCKS(mp) -
+               (loginternal ? logblocks : 0);
+       sbp->sb_frextents = 0;  /* will do a free later */
+       sbp->sb_uquotino = sbp->sb_pquotino = 0;
+       sbp->sb_qflags = 0;
+       sbp->sb_unit = dsunit;
+       sbp->sb_width = dswidth;
+       if (dirversion == 2)
+               sbp->sb_dirblklog = dirblocklog - blocklog;
+       if (iaflag) {
+               sbp->sb_inoalignmt = XFS_INODE_BIG_CLUSTER_SIZE >> blocklog;
+               iaflag = sbp->sb_inoalignmt != 0;
+       } else
+               sbp->sb_inoalignmt = 0;
+       sbp->sb_versionnum =
+               XFS_SB_VERSION_MKFS(iaflag, dsunit != 0, extent_flagging,
+                       dirversion == 2);
+
+       bzero(XFS_BUF_PTR(buf), BBSIZE);
+       libxfs_xlate_sb(XFS_BUF_PTR(buf), sbp, -1, ARCH_CONVERT,
+                       XFS_SB_ALL_BITS);
+       libxfs_writebuf(buf, 1);
+
+       if (!qflag)
+               printf(
+                  "meta-data=%-22s isize=%-6d agcount=%lld, agsize=%lld blks\n"
+                  "data     =%-22s bsize=%-6d blocks=%lld, imaxpct=%d\n"
+                  "         =%-22s sunit=%-6d swidth=%d blks, unwritten=%d\n"
+                  "naming   =version %-14d bsize=%-6d\n"
+                  "log      =%-22s bsize=%-6d blocks=%lld\n"
+                  "realtime =%-22s extsz=%-6d blocks=%lld, rtextents=%lld\n",
+                       dfile, isize, agcount, agsize,
+                       "", blocksize, dblocks, sbp->sb_imax_pct,
+                       "", dsunit, dswidth, extent_flagging,
+                       dirversion, dirversion == 1 ? blocksize : dirblocksize,
+                       logfile, 1 << blocklog, logblocks,
+                       rtfile, rtextblocks << blocklog, rtblocks, rtextents);
+       /*
+        * If the data area is a file, then grow it out to its final size
+        * so that the reads for the end of the device in the mount code
+        * will succeed.
+        */
+       if (xi.disfile && ftruncate64(xi.dfd, dblocks * blocksize) < 0) {
+               fprintf(stderr, "%s: Growing the data section file failed\n",
+                       progname);
+               exit(1);
+       }
+       /*
+        * Zero the log if there is one.
+        */
+       if (loginternal)
+               xi.logdev = xi.ddev;
+       if (xi.logdev)
+               libxfs_log_clear(
+                    xi.logdev, 
+                    XFS_FSB_TO_DADDR(mp, logstart),
+                   (xfs_extlen_t)XFS_FSB_TO_BB(mp, logblocks),
+                    &sbp->sb_uuid,
+                    XLOG_FMT);
+
+       mp = libxfs_mount(mp, sbp, xi.ddev, xi.logdev, xi.rtdev, 1);
+       if (!mp) {
+               fprintf(stderr, "%s: mount initialization failed\n", progname);
+               exit(1);
+       }
+       if (xi.logdev &&
+           XFS_FSB_TO_B(mp, logblocks) <
+           XFS_MIN_LOG_FACTOR * max_trans_res(mp)) {
+               fprintf(stderr, "%s: log size (%lld) is too small for "
+                               "transaction reservations\n",
+                       progname, logblocks);
+               exit(1);
+       }
+
+       for (agno = 0; agno < agcount; agno++) {
+               /*
+                * Superblock.
+                */
+               buf = libxfs_getbuf(xi.ddev,
+                               XFS_AG_DADDR(mp, agno, XFS_SB_DADDR), 1);
+               bzero(XFS_BUF_PTR(buf), BBSIZE);
+                libxfs_xlate_sb(XFS_BUF_PTR(buf), sbp, -1, ARCH_CONVERT,
+                               XFS_SB_ALL_BITS);
+               libxfs_writebuf(buf, 1);
+
+               /*
+                * AG header block: freespace
+                */
+               buf = libxfs_getbuf(mp->m_dev,
+                               XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR), 1);
+               agf = XFS_BUF_TO_AGF(buf);
+               bzero(agf, BBSIZE);
+               if (agno == agcount - 1)
+                       agsize = dblocks - (xfs_drfsbno_t)(agno * agsize);
+               INT_SET(agf->agf_magicnum, ARCH_CONVERT, XFS_AGF_MAGIC);
+               INT_SET(agf->agf_versionnum, ARCH_CONVERT, XFS_AGF_VERSION);
+               INT_SET(agf->agf_seqno, ARCH_CONVERT, agno);
+               INT_SET(agf->agf_length, ARCH_CONVERT, (xfs_agblock_t)agsize);
+               INT_SET(agf->agf_roots[XFS_BTNUM_BNOi], ARCH_CONVERT,
+                               XFS_BNO_BLOCK(mp));
+               INT_SET(agf->agf_roots[XFS_BTNUM_CNTi], ARCH_CONVERT,
+                               XFS_CNT_BLOCK(mp));
+               INT_SET(agf->agf_levels[XFS_BTNUM_BNOi], ARCH_CONVERT, 1);
+               INT_SET(agf->agf_levels[XFS_BTNUM_CNTi], ARCH_CONVERT, 1);
+               INT_SET(agf->agf_flfirst, ARCH_CONVERT, 0);
+               INT_SET(agf->agf_fllast, ARCH_CONVERT, XFS_AGFL_SIZE - 1);
+               INT_SET(agf->agf_flcount, ARCH_CONVERT, 0);
+               nbmblocks = (xfs_extlen_t)(agsize - XFS_PREALLOC_BLOCKS(mp));
+               INT_SET(agf->agf_freeblks, ARCH_CONVERT, nbmblocks);
+               INT_SET(agf->agf_longest, ARCH_CONVERT, nbmblocks);
+               if (loginternal && agno == logagno) {
+                       INT_MOD(agf->agf_freeblks, ARCH_CONVERT, -logblocks);
+                       INT_SET(agf->agf_longest, ARCH_CONVERT, agsize - 
+                               XFS_FSB_TO_AGBNO(mp, logstart) - logblocks);
+               }
+               if (XFS_MIN_FREELIST(agf, mp) > worst_freelist)
+                       worst_freelist = XFS_MIN_FREELIST(agf, mp);
+               libxfs_writebuf(buf, 1);
+
+               /*
+                * AG header block: inodes
+                */
+               buf = libxfs_getbuf(mp->m_dev,
+                               XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR), 1);
+               agi = XFS_BUF_TO_AGI(buf);
+               bzero(agi, BBSIZE);
+               INT_SET(agi->agi_magicnum, ARCH_CONVERT, XFS_AGI_MAGIC);
+               INT_SET(agi->agi_versionnum, ARCH_CONVERT, XFS_AGI_VERSION);
+               INT_SET(agi->agi_seqno, ARCH_CONVERT, agno);
+               INT_SET(agi->agi_length, ARCH_CONVERT, (xfs_agblock_t)agsize);
+               INT_SET(agi->agi_count, ARCH_CONVERT, 0);
+               INT_SET(agi->agi_root, ARCH_CONVERT, XFS_IBT_BLOCK(mp));
+               INT_SET(agi->agi_level, ARCH_CONVERT, 1);
+               INT_SET(agi->agi_freecount, ARCH_CONVERT, 0);
+               INT_SET(agi->agi_newino, ARCH_CONVERT, NULLAGINO);
+               INT_SET(agi->agi_dirino, ARCH_CONVERT, NULLAGINO);
+               for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++)
+                       INT_SET(agi->agi_unlinked[i], ARCH_CONVERT, NULLAGINO);
+               libxfs_writebuf(buf, 1);
+
+               /*
+                * BNO btree root block
+                */
+               buf = libxfs_getbuf(mp->m_dev,
+                               XFS_AGB_TO_DADDR(mp, agno, XFS_BNO_BLOCK(mp)),
+                               bsize);
+               block = XFS_BUF_TO_SBLOCK(buf);
+               bzero(block, blocksize);
+               INT_SET(block->bb_magic, ARCH_CONVERT, XFS_ABTB_MAGIC);
+               INT_SET(block->bb_level, ARCH_CONVERT, 0);
+               INT_SET(block->bb_numrecs, ARCH_CONVERT, 1);
+               INT_SET(block->bb_leftsib, ARCH_CONVERT, NULLAGBLOCK);
+               INT_SET(block->bb_rightsib, ARCH_CONVERT, NULLAGBLOCK);
+               arec = XFS_BTREE_REC_ADDR(blocksize, xfs_alloc, block, 1,
+                       XFS_BTREE_BLOCK_MAXRECS(blocksize, xfs_alloc, 1));
+               INT_SET(arec->ar_startblock, ARCH_CONVERT,
+                       XFS_PREALLOC_BLOCKS(mp));
+               if (loginternal && agno == logagno) {
+                       if (lalign) {
+                               /*
+                                * Have to insert two records
+                                */
+                               INT_SET(arec->ar_blockcount, ARCH_CONVERT, 
+                                       (xfs_extlen_t)(XFS_FSB_TO_AGBNO(
+                                               mp, logstart)
+                                       - (INT_GET(arec->ar_startblock,
+                                               ARCH_CONVERT))));
+                               nrec = arec + 1;
+                               INT_SET(nrec->ar_startblock, ARCH_CONVERT,
+                                       INT_GET(arec->ar_startblock,
+                                               ARCH_CONVERT) +
+                                       INT_GET(arec->ar_blockcount,
+                                               ARCH_CONVERT));
+                               arec = nrec;
+                               INT_MOD(block->bb_numrecs, ARCH_CONVERT, 1);
+                       } 
+                       INT_MOD(arec->ar_startblock, ARCH_CONVERT, logblocks);
+               } 
+               INT_SET(arec->ar_blockcount, ARCH_CONVERT,
+                       (xfs_extlen_t)(agsize -
+                               INT_GET(arec->ar_startblock, ARCH_CONVERT)));
+               libxfs_writebuf(buf, 1);
+
+               /*
+                * CNT btree root block
+                */
+               buf = libxfs_getbuf(mp->m_dev,
+                               XFS_AGB_TO_DADDR(mp, agno, XFS_CNT_BLOCK(mp)),
+                               bsize);
+               block = XFS_BUF_TO_SBLOCK(buf);
+               bzero(block, blocksize);
+               INT_SET(block->bb_magic, ARCH_CONVERT, XFS_ABTC_MAGIC);
+               INT_SET(block->bb_level, ARCH_CONVERT, 0);
+               INT_SET(block->bb_numrecs, ARCH_CONVERT, 1);
+               INT_SET(block->bb_leftsib, ARCH_CONVERT, NULLAGBLOCK);
+               INT_SET(block->bb_rightsib, ARCH_CONVERT, NULLAGBLOCK);
+               arec = XFS_BTREE_REC_ADDR(blocksize, xfs_alloc, block, 1,
+                       XFS_BTREE_BLOCK_MAXRECS(blocksize, xfs_alloc, 1));
+               INT_SET(arec->ar_startblock, ARCH_CONVERT,
+                       XFS_PREALLOC_BLOCKS(mp));
+               if (loginternal && agno == logagno) {
+                       if (lalign) {
+                               INT_SET(arec->ar_blockcount, ARCH_CONVERT,
+                                   (xfs_extlen_t)( XFS_FSB_TO_AGBNO(
+                                       mp, logstart) - (INT_GET(
+                                       arec->ar_startblock, ARCH_CONVERT)) )
+                               );
+                               nrec = arec + 1;
+                               INT_SET(nrec->ar_startblock, ARCH_CONVERT,
+                                   INT_GET(arec->ar_startblock, ARCH_CONVERT) +
+                                   INT_GET(arec->ar_blockcount, ARCH_CONVERT));
+                               arec = nrec;
+                               INT_MOD(block->bb_numrecs, ARCH_CONVERT, 1);
+                       }
+                       INT_MOD(arec->ar_startblock, ARCH_CONVERT, logblocks);
+               }       
+               INT_SET(arec->ar_blockcount, ARCH_CONVERT, (xfs_extlen_t)
+                       (agsize - INT_GET(arec->ar_startblock, ARCH_CONVERT)));
+               libxfs_writebuf(buf, 1);
+               /*
+                * INO btree root block
+                */
+               buf = libxfs_getbuf(mp->m_dev,
+                               XFS_AGB_TO_DADDR(mp, agno, XFS_IBT_BLOCK(mp)),
+                               bsize);
+               block = XFS_BUF_TO_SBLOCK(buf);
+               bzero(block, blocksize);
+               INT_SET(block->bb_magic, ARCH_CONVERT, XFS_IBT_MAGIC);
+               INT_SET(block->bb_level, ARCH_CONVERT, 0);
+               INT_SET(block->bb_numrecs, ARCH_CONVERT, 0);
+               INT_SET(block->bb_leftsib, ARCH_CONVERT, NULLAGBLOCK);
+               INT_SET(block->bb_rightsib, ARCH_CONVERT, NULLAGBLOCK);
+               libxfs_writebuf(buf, 1);
+       }
+
+       /*
+        * Touch last block, make fs the right size if it's a file.
+        */
+       buf = libxfs_getbuf(mp->m_dev,
+               (xfs_daddr_t)XFS_FSB_TO_BB(mp, dblocks - 1LL), bsize);
+       bzero(XFS_BUF_PTR(buf), blocksize);
+       libxfs_writebuf(buf, 1);
+
+       /*
+        * Make sure we can write the last block in the realtime area.
+        */
+       if (mp->m_rtdev && rtblocks > 0) {
+               buf = libxfs_getbuf(mp->m_rtdev,
+                               XFS_FSB_TO_BB(mp, rtblocks - 1LL), bsize);
+               bzero(XFS_BUF_PTR(buf), blocksize);
+               libxfs_writebuf(buf, 1);
+       }
+       /*
+        * BNO, CNT free block list
+        */
+       for (agno = 0; agno < agcount; agno++) {
+               xfs_alloc_arg_t args;
+               xfs_trans_t     *tp;
+
+               bzero(&args, sizeof(args));
+               args.tp = tp = libxfs_trans_alloc(mp, 0);
+               args.mp = mp;
+               args.agno = agno;
+               args.alignment = 1;
+               args.minalignslop = UINT_MAX;
+               args.pag = &mp->m_perag[agno];
+               if (i = libxfs_trans_reserve(tp, worst_freelist, 0, 0, 0, 0))
+                       res_failed(i);
+               libxfs_alloc_fix_freelist(&args, 0);
+               libxfs_trans_commit(tp, 0, NULL);
+       }
+       /*
+        * Allocate the root inode and anything else in the proto file.
+        */
+       mp->m_rootip = NULL;
+       parseproto(mp, NULL, &protostring, NULL);
+
+       /*
+        * protect ourselves against possible stupidity
+        */
+       if (XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino) != 0) {
+               fprintf(stderr, "%s: root inode not created in AG 0, "
+                               "created in AG %u",
+                       progname, XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino));
+               exit(1);
+       }
+
+       /*
+        * write out multiple copies of superblocks with the rootinode field set
+        */
+       if (mp->m_sb.sb_agcount > 1) {
+               /*
+                * the last superblock
+                */
+               buf = libxfs_readbuf(mp->m_dev,
+                               XFS_AGB_TO_DADDR(mp, mp->m_sb.sb_agcount-1,
+                                       XFS_SB_DADDR),
+                               BTOBB(mp->m_sb.sb_sectsize), 1);
+               INT_SET((XFS_BUF_TO_SBP(buf))->sb_rootino,
+                               ARCH_CONVERT, mp->m_sb.sb_rootino);
+               libxfs_writebuf(buf, 1);
+               /*
+                * and one in the middle for luck
+                */
+               if (mp->m_sb.sb_agcount > 2) {
+                       buf = libxfs_readbuf(mp->m_dev,
+                               XFS_AGB_TO_DADDR(mp, (mp->m_sb.sb_agcount-1)/2,
+                                       XFS_SB_DADDR),
+                               BTOBB(mp->m_sb.sb_sectsize), 1);
+                       INT_SET((XFS_BUF_TO_SBP(buf))->sb_rootino,
+                               ARCH_CONVERT, mp->m_sb.sb_rootino);
+                       libxfs_writebuf(buf, 1);
+               }
+       }
+
+       /*
+        * Mark the filesystem ok.
+        */
+       buf = libxfs_getsb(mp, 1);
+       (XFS_BUF_TO_SBP(buf))->sb_inprogress = 0;
+       libxfs_writebuf(buf, 1);
+
+       libxfs_umount(mp);
+       if (xi.rtdev)
+               libxfs_device_close(xi.rtdev);
+       if (xi.logdev && xi.logdev != xi.ddev)
+               libxfs_device_close(xi.logdev);
+       libxfs_device_close(xi.ddev);
+
+       return 0;
+}
+
+static void
+conflict(
+       char    opt,
+       char    *tab[],
+       int     oldidx,
+       int     newidx)
+{
+       fprintf(stderr, "Cannot specify both -%c %s and -%c %s\n",
+               opt, tab[oldidx], opt, tab[newidx]);
+       usage();
+}
+
+
+static void
+illegal(
+       char    *value,
+       char    *opt)
+{
+       fprintf(stderr, "Illegal value %s for -%s option\n", value, opt);
+       usage();
+}
+
+static int
+ispow2(
+       unsigned int    i)
+{
+       return (i & (i - 1)) == 0;
+}
+
+static void
+reqval(
+       char    opt,
+       char    *tab[],
+       int     idx)
+{
+       fprintf(stderr, "-%c %s option requires a value\n", opt, tab[idx]);
+       usage();
+}
+
+static void
+respec(
+       char    opt,
+       char    *tab[],
+       int     idx)
+{
+       fprintf(stderr, "-%c ", opt);
+       if (tab)
+               fprintf(stderr, "%s ", tab[idx]);
+       fprintf(stderr, "option respecified\n");
+       usage();
+}
+
+static void
+unknown(
+       char    opt,
+       char    *s)
+{
+       fprintf(stderr, "unknown option -%c %s\n", opt, s);
+       usage();
+}
+
+static int
+max_trans_res(
+       xfs_mount_t                     *mp)
+{
+       uint                            *p;
+       int                             rval;
+       xfs_trans_reservations_t        *tr;
+
+       tr = &mp->m_reservations;
+
+       for (rval = 0, p = (uint *)tr; p < (uint *)(tr + 1); p++) {
+               if ((int)*p > rval)
+                       rval = (int)*p;
+       }
+       return rval;
+}
+
+long long
+cvtnum(
+       int             blocksize,
+       char            *s)
+{
+       long long       i;
+       char            *sp;
+       extern void     usage(void);
+
+       i = strtoll(s, &sp, 0);
+       if (i == 0 && sp == s)
+               return -1LL;
+       if (*sp == '\0')
+               return i;
+
+       if (*sp == 'b' && sp[1] == '\0') {
+               if (blocksize)
+                       return i * blocksize;
+
+               fprintf(stderr, "blocksize not available yet.\n");
+               usage();
+       }
+
+       if (*sp == 'k' && sp[1] == '\0')
+               return 1024LL * i;
+       if (*sp == 'm' && sp[1] == '\0')
+               return 1024LL * 1024LL * i;
+       if (*sp == 'g' && sp[1] == '\0')
+               return 1024LL * 1024LL * 1024LL * i;
+       return -1LL;
+}
+
+void
+usage(void)
+{
+       fprintf(stderr, "Usage: %s\n\
+/* blocksize */                [-b log=n|size=num]\n\
+/* data subvol */      [-d agcount=n,agsize=n,file,name=xxx,size=num,\n\
+                           sunit=value,swidth=value,unwritten=0|1]\n\
+/* inode size */       [-i log=n|perblock=n|size=num,maxpct=n]\n\
+/* log subvol */       [-l agnum=n,internal,size=num,logdev=xxx]\n\
+/* naming */           [-n log=n|size=num|version=n]\n\
+/* prototype file */   [-p fname]\n\
+/* quiet */            [-q]\n\
+/* version */          [-V]\n\
+/* realtime subvol */  [-r extsize=num,size=num,rtdev=xxx]\n\
+                       devicename\n\
+devicename is required unless -d name=xxx is given\n\
+internal 1000 block log is default unless overridden or using a volume\
+manager with log\n\
+num is xxx (bytes), or xxxb (blocks), or xxxk (xxx KB), or xxxm (xxx MB)\n\
+value is xxx (512 blocks)\n",
+               progname);
+       exit(1);
+}
diff --git a/mkfs/xfs_mkfs.h b/mkfs/xfs_mkfs.h
new file mode 100644 (file)
index 0000000..6dcd004
--- /dev/null
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_MKFS_H__
+#define        __XFS_MKFS_H__
+
+#define        XFS_DFL_BLOCKSIZE_LOG   12              /* 4096 byte blocks */
+#define        XFS_DINODE_DFL_LOG      8               /* 256 byte inodes */
+#define        XFS_MIN_DATA_BLOCKS     100
+#define        XFS_MIN_INODE_PERBLOCK  2               /* min inodes per block */
+#define        XFS_DFL_IMAXIMUM_PCT    25              /* max % of space for inodes */
+#define        XFS_IFLAG_ALIGN         1               /* -i align defaults on */
+#define        XFS_MIN_REC_DIRSIZE     12              /* 4096 byte dirblocks (V2) */
+#define        XFS_DFL_DIR_VERSION     2               /* default directory version */
+#define        XFS_DFL_LOG_SIZE        1000            /* default log size, blocks */
+#define        XFS_MIN_LOG_FACTOR      3               /* min log size factor */
+#define        XFS_DFL_LOG_FACTOR      16              /* default log size, factor */
+                                               /* with max trans reservation */
+extern void  usage (void);
+extern long long  cvtnum (int blocksize, char *s);
+
+#endif /* __XFS_MKFS_H__ */
diff --git a/repair/Makefile b/repair/Makefile
new file mode 100644 (file)
index 0000000..96f81d7
--- /dev/null
@@ -0,0 +1,72 @@
+#
+# Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+# 
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+# 
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# 
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like.  Any license provided herein, whether implied or
+# otherwise, applies only to this software file.  Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write the Free Software Foundation, Inc., 59
+# Temple Place - Suite 330, Boston MA 02111-1307, USA.
+# 
+# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+# Mountain View, CA  94043, or:
+# 
+# http://www.sgi.com 
+# 
+# For further information regarding this notice, see: 
+# 
+# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+#
+
+TOPDIR = ..
+include $(TOPDIR)/include/builddefs
+
+CMDTARGET = xfs_repair
+CMDDEPS = $(LIBXFS)
+
+HFILES = agheader.h attr_repair.h avl.h avl64.h bmap.h dinode.h dir.h \
+       dir2.h dir_stack.h err_protos.h globals.h incore.h protos.h rt.h \
+       scan.h versions.h
+
+CFILES = agheader.c attr_repair.c avl.c avl64.c bmap.c dino_chunks.c \
+       dinode.c dir.c dir2.c dir_stack.c globals.c incore.c \
+       incore_bmc.c init.c incore_ext.c incore_ino.c io.c phase1.c \
+       phase2.c phase3.c phase4.c phase5.c phase6.c phase7.c rt.c sb.c \
+       scan.c versions.c xfs_repair.c
+
+LLDLIBS = $(LIBXFS) $(LIBUUID)
+
+default: $(CMDTARGET)
+
+include $(BUILDRULES)
+
+#
+# Tracing flags:
+# -DXR_BMAP_DBG                incore block bitmap debugging
+# -DXR_INODE_TRACE     inode processing
+# -DXR_BMAP_TRACE      bmap btree processing
+# -DXR_DIR_TRACE       directory processing
+# -DXR_DUP_TRACE       duplicate extent processing
+# -DXR_BCNT_TRACE      incore bcnt freespace btree building
+# -DXR_BLD_FREE_TRACE  building on-disk freespace (bcnt/bno) btrees
+# -DXR_BLD_INO_TRACE   building on-disk inode allocation btrees
+# -DXR_BLD_ADD_EXTENT  track phase 5 block extent creation
+# -DXR_BCKPTR_DBG      parent list debugging info
+#
+CFLAGS += -DAVL_USER_MODE -DAVL_FUTURE_ENHANCEMENTS
+
+install: default
+       $(INSTALL) -m 755 -d $(XFS_CMDS_SBIN_DIR)
+       $(INSTALL) -m 755 $(CMDTARGET) $(XFS_CMDS_SBIN_DIR)
diff --git a/repair/README b/repair/README
new file mode 100644 (file)
index 0000000..c007af9
--- /dev/null
@@ -0,0 +1,718 @@
+A living document.  The basic algorithm.
+
+TODO: (D == DONE)
+
+0)     Need to bring some sanity into the case of flags that can
+       be set in the secondaries at mkfs time but reset or cleared
+       in the primary later in the filesystem's life.
+
+0)     Clear the persistent read-only bit if set.  Clear the
+       shared bit if set and the version number is zero.  This
+       brings the filesystem back to a known state.
+
+0)     make sure that superblock geometry code checks the logstart
+       value against whether or not we have an internal log.
+       If we have an internal log and a logdev, that's ok.
+       (Maybe we just aren't using it).  If we have an external
+       log (logstart == 0) but no logdev, that's right out.
+
+0)     write secondary superblock search code.  Rewrite initial
+       superblock parsing code to be less complicated.  Just
+       use variables to indicate primary, secondary, etc.,
+       and use a function to get the SB given a specific location
+       or something.
+
+2)     For inode alignment, if the SB bit is set and the
+       inode alignment size field in the SB is set, then
+       believe that the fs inodes MUST be aligned and
+       disallow any non-aligned inodes.  Likewise, if
+       the SB bit isn't set (or earlier version) and
+       the inode alignment size field is zero, then
+       never set the bit even if the inodes are aligned.
+       Note that the bits and alignment values are
+       replicated in the secondary superblocks.
+
+0)  add feature specification options to parse_arguments
+
+0)     add logic to add_inode_ref(), add_inode_reached()
+       to detect nlink overflows in cases where the fs
+       (or user had indicated fs) doesn't support new nlinks.
+
+6) check to make sure that the inodes containing btree blocks
+       with # recs < minrecs aren't legit -- e.g. the only
+       descendant of a root block.
+
+7)  inode di_size value sanity checking -- should always be less than
+       the biggest filebno offset mentioned in the bmaps.  Doesn't
+       have to be equal though since we're allowed to overallocate
+       (it just wastes a little space).  This is for both regular
+       files and directories (have to modify the existing directory
+       check).
+
+       Add tracking of largest offset in bmap scanning code.  Compare
+       value against di_size.  Should be >= di_size.
+
+       Alternatively, you could pass the inode into down through
+       the extent record processing layer and make the checks
+       there.
+
+       Add knowledge of quota inodes.  size of quota inode is
+       always zero.  We should maintain that.
+
+8)  Basic quota stuff.
+
+       Invariants
+               if quota feature bit is set, the quota inodes
+               if set, should point to disconnected, 0 len inodes.
+
+D -            if quota inodes exist, the quota bits must be
+               turned on.  It's ok for the quota flags to be
+               zeroed but they should be in a legal state
+               (see xfs_quota.h).
+
+D -            if the quota flags are non-zero, the corresponding
+               quota inodes must exist.
+
+               quota inodes are never deleted, only their space
+               is freed.
+
+       if quotas are being downgraded, then check quota inodes
+       at the end of phase 3.  If they haven't been cleared yet,
+       clear them.  Regardless, then clear sb flags (quota inode
+       fields, quota flags, and quota bit).
+
+
+5) look at verify_inode_chunk().  it's probably really broken.
+
+
+9)  Complicated quota stuff.  Add code to bmap scan code to
+       track used blocks.  Add another pair of AVL trees
+       to track user and project quota limits.  Set AVL
+       trees up at the beginning of phase 3.  Quota inodes
+       can be rebuilt or corrected later if damaged.
+
+
+D - 0) fix directory processing.  phase 3, if an entry references
+       a free inode, *don't* mark it used.  wait for the rest of
+       phase 3 processing to hit that inode.  If it looks like it's
+       in use, we'll mark in use then.  If not, we'll clear it and
+       mark the inode map.  then in phase 4, you can depend on the
+       inode map.  should probably set the parent info in phase 4.
+       So we have a check_dups flag.  Maybe we should change the
+       name of check_dir to discover_inodes.  During phase 3
+       (discover_inodes == 1), uncertain inodes are added to list.
+       During phase 4 (discover_inodes == 0), they aren't.  And
+       we never mark inodes in use from the directory code.
+       During phase 4, we shouldn't complain about names with
+       a leading '/' since we made those names in phase 3.
+
+       Have to change dino_chunks.c (parent setting), dinode.c
+       and dir.c.
+
+D - 0) make sure we don't screw up filesystems with real-time inodes.
+       remember to initialize real-time map with all blocks XR_E_FREE.
+
+D - 4) check contents of symlinks as well as lengths in process_symlinks()
+       in dinode.c.  Right now, we only check lengths.
+
+
+D - 1) Feature mismatches -- for quotas and attributes,
+       if the stuff exists in the filesystem, set the
+       superblock version bits.
+
+D - 0) rewrite directory leaf block holemap comparison code.
+       probably should just check the leaf block hole info
+       against our incore bitmap.  If the hole flag is not
+       set, then we know that there can only be one hole and
+       it has to be between the entry table and the top of heap.
+       If the hole flag is set, then it's ok if the on-disk
+       holemap doesn't describe everything as long as what
+       it does describe doesn't conflict with reality.
+
+D - 0) rewrite setting nlinks handling -- for version 1
+       inodes, set both nlinks and onlinks (zero projid
+       and pad) if we have to change anything.  For
+       version 2, I think we're ok.
+
+D - 0) Put awareness of quota inode into mark_standalone_inodes.
+
+
+D - 8) redo handling of superblocks with bad version numbers.  need
+       to bail out (without harming) fs's that have sbs that
+       are newer than we are.
+
+D - 0)  How do we handle feature mismatches between fs and
+       superblock?  For nlink, check each inode after you
+       know it's good.  If onlinks is 0 and nlinks is > 0
+       and it's a version 2 inode, then it really is a version
+       2 inode and the nlinks flag in the SB needs to be set.
+       If it's a version 2 inode and the SB agrees but onlink
+       is non-zero, then clear onlink.
+
+D - 3)  keep cumulative counts of freeblocks, inodes, etc. to set in
+       the superblock at the end of phase 5.  Remember that
+       agf freeblock counters don't include blocks used by
+       the non-root levels of the freespace trees but that
+       the sb free block counters include those.
+
+D - 0)  Do parent setting in directory code (called by phase 3).
+       actually, I put it in process_inode_set and propagated
+       the parent up to it from the process_dinode/process_dir
+       routines.  seemed cleaner than pushing the irec down
+       and letting them bang on it.
+
+D - 0)  If we clear a file in phase 4, make sure that if it's
+       a directory that the parent info is cleared also.
+
+D - 0) put inode tree flashover (call to add_ino_backptrs) into phase 5.
+
+D - 0) do set/get_inode_parent functions in incore_ino.c.
+       also do is/set/ inode_processed.
+       
+D - 0) do a versions.c to extract feature info and set global vars
+       from the superblock version number and possibly feature bits
+
+D - 0) change longform_dir_entry_check + shortform_dir_entry_check
+       to return a count of how many illegal '/' entries exist.
+       if > 0, then process_dirstack needs to call prune_dir_entry
+       with a hash value of 0 to delete the entries.
+
+D - 0)  add the "processed" bitfield
+       to the backptrs_t struct that gets attached after
+       phase 4.
+
+D- )  Phase 6 !!!
+
+D - 0) look at usage of XFS_MAKE_IPTR().  It does the right
+       arithmetic assuming you count your offsets from the
+       beginning of the buffer.
+
+
+D - 0) look at references to XFS_INODES_PER_CHUNK.  change the
+       ones that really mean sizeof(__uint64_t)*NBBY to
+       something else (like that only defined as a constant
+       INOS_PER_IREC. this isn't as important since
+       XFS_INODES_PER_CHUNK will never chang
+
+
+D - 0) look at junk_zerolen_dir_leaf_entries() to make sure it isn't hosing
+       the freemap since it assumed that bytes between the
+       end of the table and firstused didn't show up in the
+       freemap when they actually do.
+
+D - 0) track down XFS_INO_TO_OFFSET() usage.  I don't think I'm
+       using it right.  (e.g. I think
+       it gives you the offset of an inode into a block but
+       on small block filesystems, I may be reading in inodes
+       in multiblock buffers and working from the start of
+       the buffer plus I'm using it to get offsets into
+       my ino_rec's which may not be a good idea since I
+       use 64-inode ino_rec's whereas the offset macro
+       works off blocksize).
+
+D - 0.0) put buffer -> dirblock conversion macros into xfs kernel code
+
+D - 0.2) put in sibling pointer checking and path fixup into
+       bmap (long form) scan routines in scan.c
+D - 0.3) find out if bmap btrees with only root blocks are legal.  I'm
+       betting that they're not because they'd be extent inodes
+       instead.  If that's the case, rip some code out of
+       process_btinode()
+
+
+Algorithm (XXX means not done yet):
+
+Phase 1 -- get a superblock and zero log
+
+       get a superblock -- either read in primary or
+               find a secondary (ag header), check ag headers
+
+               To find secondary:
+
+                       Go for brute force and read in the filesystem N meg
+                               at a time looking for a superblock.  as a
+                               slight optimization, we could maybe skip
+                               ahead some number of blocks to try and get
+                               towards the end of the first ag.
+
+                       After you find a secondary, try and find at least
+                               other ags as a verification that the
+                               secondary is a good superblock.
+
+XXX -          Ugh.  Have to take growfs'ed filesystems into account.
+               The root superblock geometry info may not be right if
+               recovery hasn't run or it's been trashed.  The old ag's
+               may or may not be right since the system could have crashed
+               during growfs or the bwrite() to the superblocks could have
+               failed and the buffer been reused.  So we need to check
+               to see if another ag exists beyond the "last" ag
+               to see if a growfs happened.  If not, then we know that
+               the geometry info is good and treat the fs as a non-growfs'ed
+               fs.  If we do have inconsistencies, then the smaller geometry
+               is the old fs and the larger the new.  We can check the
+               new superblocks to see if they're good.  If not, then we
+               know the system crashed at or soon after the growfs and
+               we can choose to either accept the new geometry info or
+               trash it and truncate the fs back to the old geometry
+               parameters.
+
+       Cross-check geometry information in secondary sb's with
+       primary to ensure that it's correct.
+
+       Use sim code to allow mount filesystems *without* reading
+       in root inode.  This sets up the xfs_mount_t structure
+       and allows us to use XFS_* macros that we wouldn't
+       otherwise be able to use.
+
+       Note, I split phase 1 and 2 into separate pieces because I want
+       to initialize the xfs_repair incore data structures after phase 1.
+
+       parse superblock version and feature flags and set appropriate
+               global vars to reflect the flags (attributes, quotas, etc.)
+
+       Workaround for the mkfs "not zeroing the superblock buffer" bug.
+       Determine what field is the last valid non-zero field in
+       the superblock.  The trick here is to be able to differentiate
+       the last valid non-zero field in the primary superblock and
+       secondaries because they may not be the same.  Fields in
+       the primary can be set as the filesystem gets upgraded but
+       the upgrades won't touch the secondaries.  This means that
+       we need to find some number of secondaries and check them.
+       So we do the checking here and the setting in phase2.
+
+Phase 2 -- check integrity of allocation group allocation structures
+
+       zero the log if in no modify mode
+
+       sanity check ag headers -- superblocks match, agi isn't
+                               trashed -- the agf and agfl
+                               don't really matter because we can
+                               just recreate them later.
+
+               Zero part of the superblock buffer if necessary
+
+               Walk the freeblock trees to get an
+                       initial idea of what the fs thinks is free.
+                       Files that disagree (claim free'd blocks)
+                       can be salvaged or deleted.  If the btree is
+                       internally inconsistent, when in doubt, mark
+                       blocks free.  If they're used, they'll be stolen
+                       back later.  don't have to check sibling pointers
+                       for each level since we're going to regenerate
+                       all the trees anyway.
+               Walk the inode allocation trees and
+                       make sure they're ok, otherwise the sim
+                       inode routines will probably just barf.
+                       mark inode allocation tree blocks and ag header
+                       blocks as used blocks.  If the trees are
+                       corrupted, this phase will generate "uncertain"
+                       inode chunks.  Those chunks go on a list and
+                       will have to verified later.  Record the blocks
+                       that are used to detect corruption and multiply
+                       claimed blocks.  These trees will be regenerated
+                       later.  Mark the blocks containing inodes referenced
+                       by uncorrupted inode trees as being used by inodes.
+                       The other blocks will get marked when/if the inodes
+                       are verified.
+
+       calculate root and realtime inode numbers from the
+               filesystem geometry, fix up mount structure's
+               incore superblock if they're wrong.
+
+ASSUMPTION:  at end of phase 2, we've got superblocks and ag headers
+       that are not garbage (some data in them like counters and the
+       freeblock and inode trees may be inconsistent but the header
+       is readable and otherwise makes sense).
+
+XXX    if in no_modify mode, check for blocks claimed by one freespace
+       btree and not the other
+       
+Phase 3 -- traverse inodes to make the inodes, bmaps and freespace maps
+               consistent.  For each ag, use either the incore inode map or
+               scan the ag for inodes.
+               Let's use the incore inode map, now that we've made one
+               up in phase2.  If we lose the maps, we'll locate inodes
+               when we traverse the directory heirarchy.  If we lose both,
+               we could scan the disk.  Ugh.  Maybe make that a command-line
+               option that we support later.
+               
+       ASSUMPTION: we know if the ag allocation btrees are intact (phase 2)
+
+       First - Walk and clear the ag unlinked lists.  We'll process
+               the inodes later.  Check and make sure that the unlinked
+               lists reference known inodes.  If not, add to the list
+               of uncertain inodes.
+
+       Second, check the uncertain inode list generated in phase2 and
+               above and get them into the inode tree if they're good.
+               The incore inode cluster tree *always* has good
+               clusters (alignment, etc.) in it.
+               
+       Third, make sure that the root inode is known.  If not,
+               and we know the inode number from the superblock,
+               discover that inode and it's chunk.
+
+       Then, walk the incore inode-cluster tree.
+
+       Maintain an in-core bitmap over the entire fs for block allocation.
+
+       traverse each inode, make sure inode mode field matches free/allocated
+               bit in the incore inode allocation tree.  If there's a mismatch,
+               assume that the inode is in use.
+
+               - for each in-use inode, traverse each bmap/dir/attribute
+                       map or tree.  Maintain a map (extent list?) for the
+                       current inode.
+
+               - For each block marked as used, check to see if already known
+                       (referenced by another file or directory) and sanity
+                       check the contents of the block as well if possible
+                       (in the case of meta-blocks).
+
+               - if the inode claims already used blocks, mark the blocks
+                       as multiply claimed (duplicate) and go on.  the inode
+                       will be cleared in phase 4.
+
+               - if metablocks are garbaged, clear the inode after
+                       traversing what you can of the bmap and
+                       proceed to next inode.  We don't have to worry
+                       about trashing the maps or trees in cleared inodes
+                       because the blocks will show up as free in the
+                       ag freespace trees that we set up in phase 5.
+
+               - clear the di_next_unlinked pointer -- all unlinked
+                       but active files go bye-bye.
+
+               - All blocks start out unknown.  We need the last state
+                       in case we run into a case where we need to step
+                       on a block to store filesystem meta-data and it
+                       turns out later that it's referenced by some inode's
+                       bmap.  In that case, the inode loses because we've
+                       already trashed the block.  This shouldn't happen
+                       in the first version unless some inode has a bogus
+                       bmap referencing blocks in the ag header but the
+                       4th state will keep us from inadvertently doing
+                       something stupid in that case.
+
+               - If inode is allocated, mark all blocks allocated to the
+                       current inode as allocated in the incore freespace
+                       bitmap.
+
+               - If inode is good and a directory, scan through it to
+                       find leaf entries and discover any unknown inodes.
+                       
+                       For shortform, we correct what we can.
+
+                       If the directory is corrupt, we try and fix it in
+                       place.  If it has zero good entries, then we blast it.
+
+                       All unknown inodes get put onto the uncertain inode
+                       list.  This is safe because we only put inodes onto
+                       the list when we're processing known inodes so the
+                       uncertain inode list isn't in use.
+
+                       We fix only one problem -- an entry that has
+                       a mathematically invalid inode numbers in them.
+                       If that's the case, we replace the inode number
+                       with NULLFSINO and we'll fix up the entry in
+                       phase 6.
+
+                       That info may conflict with the inode information,
+                       but we'll straighten out any inconsistencies there
+                       in phase4 when we process the inodes again.
+
+                       Errors involving bogus forward/back links,
+                       zero-length entries make the directory get
+                       trashed.
+
+                       if an entry references a free inode, ignore that
+                       fact for now.  wait for the rest of phase 3
+                       processing to hit that inode.  If it looks like it's
+                       in use, we'll mark in use then.  If not, we'll
+                       clear it and mark the inode map.  then in phase
+                       4, you can depend on the inode map.
+       
+                       Entries that point to non-existent or free
+                       inodes, and extra blocks in the directory
+                       will get fixed in place in a later pass.
+
+                       Entries that point to a quota inode are
+                       marked TBD.
+
+                       If the directory internally points to the same
+                       block twice, the directory gets blown away.
+
+       Note that processing uncertain inodes can add more inodes
+       to the uncertain list if they're directories.  So we loop
+       until the uncertain list is empty.
+
+       During inode verification, if the inode blocks are unknown,
+       mark then as in-use by inodes.
+
+XXX    HEURISTIC -- if we blow an inode away that has space,
+       assume that the freespace btree is now out of wack.
+       If it was ok earlier, it's certain to be wrong now.
+       And the odds of this space free cancelling out the
+       existing error is so small I'm willing to ignore it.
+       Should probably do this via a global var and complain
+       about this later.
+
+Assumption:  All known inodes are now marked as in-use or free.  Any
+       inodes that we haven't found by now are hosed (lost) since
+       we can't reach them via either the inode btrees or via directory
+       entries.
+
+       Directories are semi-clean.  All '.' entries are good.
+       Root '..' entry is good if root inode exists.  All entries
+       referencing non-existent inodes, free inodes, etc. 
+
+XXX    verify that either quota inode is 0 or NULLFSINO or
+       if sb quota flag is non zero, verify that quota inode
+       is NULLFSINO or is referencing a used, but disconnected
+       inode.
+
+XXX    if in no_modify mode, check for unclaimed blocks
+
+- Phase 4 - Check for inodes referencing duplicate blocks
+
+       At this point, all known duplicate blocks are marked in
+       the block map.  However, some of the claimed blocks in
+       the bmap may in fact be free because they belong to inodes
+       that have to be cleared either due to being a trashed
+       directory or because it's the first inode to claim a
+       block that was then claimed later.  There's a similar
+       problem with meta-data blocks that are referenced by
+       inode bmaps that are going to be freed once the inode
+       (or directory) gets cleared.
+
+       So at this point, we collect the duplicate blocks into
+       extents and put them into the duplicate extent list.
+
+       Mark the ag header blocks as in use.
+
+       We then process each inode twice -- the first time
+       we check to see if the inode claims a duplicate extent
+       and we do NOT set the block bitmap.  If the inode claims
+       a duplicate extent, we clear the inode.  Since the bitmap
+       hasn't been set, that automatically frees all blocks associated
+       with the cleared inode.  If the inode is ok, process it a second
+       time and set the bitmap since we know that this inode will live.
+
+       The unlinked list gets cleared in every inode at this point as
+       well.  We no longer need to preserve it since we've discovered
+       every inode we're going to find from it.
+
+       verify existence of root inode.  if it exists, check for
+       existence of "lost+found".  If it exists, mark the entry
+       to be deleted, and clear the inode.  All the inodes that
+       were connected to the lost+found will be reconnected later.
+
+XXX    HEURISTIC -- if we blow an inode away that has space,
+       assume that the freespace btree is now out of wack.
+       If it was ok earlier, it's certain to be wrong now.
+       And the odds of this space free cancelling out the
+       existing error is so small I'm willing to ignore it.
+       Should probably do this via a global var and complain
+       about this later.
+
+       Clear the quota inodes if the inode btree says that
+       they're not in use.  The space freed will get picked
+       up by phase 5.
+       
+XXX    Clear the quota inodes if the filesystem is being downgraded.
+
+- Phase 5 - Build inode allocation trees, freespace trees and
+               agfl's for each ag.  After this, we should be able to
+               unmount the filesystem and remount it for real.
+
+       For each ag: (if no in no_modify mode)
+
+       scan bitmap first to figure out number of extents.
+       
+       calculate space required for all trees.  Start with inode trees.
+       Setup the btree cursor which includes the list of preallocated
+       blocks.  As a by-product, this will delete the extents required
+       for the inode tree from the incore extent tree.
+       
+       Calculate how many extents will be required to represent the
+       remaining free extent tree on disk (twice, one for bybno and
+       one for bycnt).  You have to iterate on this because consuming
+       extents can alter the number of blocks required to represent
+       the remaining extents.  If there's slop left over, you can
+       put it in the agfl though.
+
+       Then, manually build the trees, agi, agfs, and agfls.
+
+XXX    if in no_modify mode, scan the on-disk inode allocation
+       trees and compare against the incore versions.  Don't have
+       to scan the freespace trees because we caught the problems
+       there in phase2 and phase3.  But if we cleared any inodes
+       with space during phases 3 or 4, now is the time to complain.
+
+XXX -  Free duplicate extent lists. ???
+
+Assumptions:  at this point, sim code having to do with inode
+               creation/modification/deletion and space allocation
+               work because the inode maps, space maps, and bmaps
+               for all files in the filesystem are good.  The only
+               structures that are screwed up are the directory contents,
+               which means that lookup may not work for beans, the
+               root inode which exists but may be completely bogus and
+               the link counts on all inodes which may also be bogus.
+
+       Free the bitmap, the freespace tree.
+
+       Flash the incore inode tree over from parent list to having
+       full backpointers.
+
+       realtime processing, if any --
+
+               (Skip to below if running in no_modify mode).
+
+               Generate the realtime bitmap from the incore realtime
+               extent map and slam the info into the realtime bitmap
+               inode.  Generate summary info from the realtime extent map.
+               
+XXX            if in no_modify mode, compare contents of realtime bitmap
+               inode to the incore realtime extent map.  generate the
+               summary info from the incore realtime extent map.
+               compare against the contents of the realtime summary inode.
+               complain if bad.
+
+       reset superblock counters, sync version numbers
+
+- Phase 6 - directory traversal -- check reference counts,
+               attach disconnected inodes, fix up bogus directories
+
+       Assumptions:  all on-disk space and inode trees are structurally
+               sound.  Incore and on-disk inode trees agree on whether
+               an inode is in use.
+
+               Directories are structurally sound.  All hashvalues
+               are monotonically increasing and interior nodes are
+               correct so lookups work.  All legal directory entries
+               point to inodes that are in use and exist.  Shortform
+               directories are fine except that the links haven't been
+               checked for conflicts (cycles, ".." being correct, etc.).
+               Longform directories haven't been checked for those problems
+               either PLUS longform directories may still contain
+               entries beginning with '/'.  No zero-length entries
+               exist (they've been deleted or converted to '/').
+
+               Root directory may or may not exist.  orphange may
+               or may not exist.  Contents of either may be completely
+               bogus.
+
+               Entries may point to free or non-existent inodes.
+
+       At this we point, we may need new incore structures and
+               may be able to trash an old one (like the filesystem
+               block map)
+
+       If '/' is trashed, then reinitialize it.
+
+       If no realtime inodes, make them and if necessary, slam the
+               summary info into the realtime summary
+               inode.  Ditto with the realtime bitmap inode.
+       
+       Make orphanage (lost+found ???).
+
+       Traverse each directory from '/' (unless it was created).
+               Check directory structure and each directory entry.
+               If the entry is bogus (points to a non-existent or
+               free inode, for example), mark that entry TBD.  Maintain
+               link counts on all inodes.  Currently, traversal is
+               depth-first.
+
+               Mark every inode reached as "reached" (includes
+               bumping up link counts).
+
+               If a entry points to a directory but the parent (..)
+               disagrees, then blow away the entry.  if the directory
+               being pointed to winds up disconnected, it'll be moved
+               to the orphanage (and the link count incremented to
+               account for the link and the reached bit set then).
+
+               If an entry points to a directory that we've already
+               reached, then some entry is bad and should be blown
+               away.  It's easiest to blow away the current entry
+               plus since presumably the parent entry in the
+               reached directory points to another directory,
+               then it's far more likely that the current
+               entry is bogus (otherwise the parent should point
+               at it).
+
+               If an entry points to a non-existent of free inode,
+               blow the entry away.
+
+               Every time a good entry is encountered update the
+               link count for the inode that the entry points to.
+
+       After traversal, scan incore inode map for directories not
+               reached.  Go to first one and try and find it's root
+               by following .. entries.  Once at root, run traversal
+               algorithm.  When algorithm terminates, move subtree
+               root inode to the orphanage.  Repeat as necessary
+               until all disconnected directories are attached.
+
+       Move all disconnected inodes to orphanage.
+
+- Phase 7:  reset reference counts if required.
+
+       Now traverse the on-disk inodes again, and make sure on-disk
+               reference counts are correct.  Reset if necessary.
+
+               SKIP all unused inodes -- that also makes us
+               skip the orphanage inode which we think is
+               unused but is really used.  However, the ref counts
+               on that should be right so that's ok.
+
+---
+
+multiple TB xfs_repair
+
+modify above to work in a couple of AGs at a time.  The bitmaps
+should span only the current set of AGs.
+
+The key it scan the inode bmaps and keep a list of inodes
+that span multiple AG sets and keep the list in a data structure
+that's keyed off AG set # as well as inode # and also has a bit
+to indicate whether or not the inode will be cleared.
+
+Then in each AG set, when doing duplicate extent processing,
+you have to process all multi-AG-set inodes that claim blocks in
+the current AG set.  If there's a conflict, you mark clear the
+inode in the current AG and you mark the multi-AG inode as
+"to be cleared".
+
+After going through all AGs, you can clear the to-be-cleared
+multi-AG-set inodes and pull them off the list.
+
+When building up the AG freespace trees, you walk the bmaps
+of all multi-AG-set inodes that are in the AG-set and include
+blocks claimed in the AG by the inode as used.
+
+This probably involves adding a phase 3-0 which would have to
+check all the inodes to see which ones are multi-AG-set inodes
+and set up the multi-AG-set inode data structure.  Plus the
+process_dinode routines may have to be altered just a bit
+to do the right thing if running in tera-byte mode (call
+out to routines that check the multi-AG-set inodes when
+appropriate).
+
+To make things go faster, phase 3-0 could probably run
+in parallel.  It should be possible to run phases 2-5
+in parallel as well once the appropriate synchronization
+is added to the incore routines and the static directory
+leaf block bitmap is changed to be on the stack.
+
+Phase 7 probably can be in parallel as well.
+
+By in parallel, I mean that assuming that an AG-set
+contains 4 AGs, you could run 4 threads, 1 per AG
+in parallel to process the AG set.
+
+I don't see how phase 6 can be run in parallel though.
+
+And running Phase 8 in parallel is just silly.
+
diff --git a/repair/agheader.c b/repair/agheader.c
new file mode 100644 (file)
index 0000000..0a4200f
--- /dev/null
@@ -0,0 +1,432 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "globals.h"
+#include "agheader.h"
+#include "protos.h"
+#include "err_protos.h"
+
+int
+verify_set_agf(xfs_mount_t *mp, xfs_agf_t *agf, xfs_agnumber_t i)
+{
+       xfs_drfsbno_t agblocks;
+       int retval = 0;
+
+       /* check common fields */
+
+       if (INT_GET(agf->agf_magicnum, ARCH_CONVERT) != XFS_AGF_MAGIC)  {
+               retval = XR_AG_AGF;
+               do_warn("bad magic # 0x%x for agf %d\n", INT_GET(agf->agf_magicnum, ARCH_CONVERT), i);
+
+               if (!no_modify)
+                       INT_SET(agf->agf_magicnum, ARCH_CONVERT, XFS_AGF_MAGIC);
+       }
+
+       if (!XFS_AGF_GOOD_VERSION(INT_GET(agf->agf_versionnum, ARCH_CONVERT)))  {
+               retval = XR_AG_AGF;
+               do_warn("bad version # %d for agf %d\n",
+                       INT_GET(agf->agf_versionnum, ARCH_CONVERT), i);
+
+               if (!no_modify)
+                       INT_SET(agf->agf_versionnum, ARCH_CONVERT, XFS_AGF_VERSION);
+       }
+
+       if (INT_GET(agf->agf_seqno, ARCH_CONVERT) != i)  {
+               retval = XR_AG_AGF;
+               do_warn("bad sequence # %d for agf %d\n", INT_GET(agf->agf_seqno, ARCH_CONVERT), i);
+
+               if (!no_modify)
+                       INT_SET(agf->agf_seqno, ARCH_CONVERT, i);
+       }
+
+       if (INT_GET(agf->agf_length, ARCH_CONVERT) != mp->m_sb.sb_agblocks)  {
+               if (i != mp->m_sb.sb_agcount - 1)  {
+                       retval = XR_AG_AGF;
+                       do_warn("bad length %d for agf %d, should be %d\n",
+                               INT_GET(agf->agf_length, ARCH_CONVERT), i, mp->m_sb.sb_agblocks);
+                       if (!no_modify)
+                               INT_SET(agf->agf_length, ARCH_CONVERT, mp->m_sb.sb_agblocks);
+               } else  {
+                       agblocks = mp->m_sb.sb_dblocks -
+                               (xfs_drfsbno_t) mp->m_sb.sb_agblocks * i;
+
+                       if (INT_GET(agf->agf_length, ARCH_CONVERT) != agblocks)  {
+                               retval = XR_AG_AGF;
+                               do_warn(
+                       "bad length %d for agf %d, should be %llu\n",
+                                       INT_GET(agf->agf_length, ARCH_CONVERT), i, agblocks);
+                               if (!no_modify)
+                                       INT_SET(agf->agf_length, ARCH_CONVERT, (xfs_agblock_t) agblocks);
+                       }
+               }
+       }
+
+       /*
+        * check first/last AGF fields.  if need be, lose the free
+        * space in the AGFL, we'll reclaim it later.
+        */
+       if (INT_GET(agf->agf_flfirst, ARCH_CONVERT) >= XFS_AGFL_SIZE)  {
+               do_warn("flfirst %d in agf %d too large (max = %d)\n",
+                       INT_GET(agf->agf_flfirst, ARCH_CONVERT), i, XFS_AGFL_SIZE);
+               if (!no_modify)
+                       INT_ZERO(agf->agf_flfirst, ARCH_CONVERT);
+       }
+
+       if (INT_GET(agf->agf_fllast, ARCH_CONVERT) >= XFS_AGFL_SIZE)  {
+               do_warn("fllast %d in agf %d too large (max = %d)\n",
+                       INT_GET(agf->agf_fllast, ARCH_CONVERT), i, XFS_AGFL_SIZE);
+               if (!no_modify)
+                       INT_ZERO(agf->agf_fllast, ARCH_CONVERT);
+       }
+
+       /* don't check freespace btrees -- will be checked by caller */
+
+       return(retval);
+}
+
+int
+verify_set_agi(xfs_mount_t *mp, xfs_agi_t *agi, xfs_agnumber_t i)
+{
+       xfs_drfsbno_t agblocks;
+       int retval = 0;
+
+       /* check common fields */
+
+       if (INT_GET(agi->agi_magicnum, ARCH_CONVERT) != XFS_AGI_MAGIC)  {
+               retval = XR_AG_AGI;
+               do_warn("bad magic # 0x%x for agi %d\n", INT_GET(agi->agi_magicnum, ARCH_CONVERT), i);
+
+               if (!no_modify)
+                       INT_SET(agi->agi_magicnum, ARCH_CONVERT, XFS_AGI_MAGIC);
+       }
+
+       if (!XFS_AGI_GOOD_VERSION(INT_GET(agi->agi_versionnum, ARCH_CONVERT)))  {
+               retval = XR_AG_AGI;
+               do_warn("bad version # %d for agi %d\n",
+                       INT_GET(agi->agi_versionnum, ARCH_CONVERT), i);
+
+               if (!no_modify)
+                       INT_SET(agi->agi_versionnum, ARCH_CONVERT, XFS_AGI_VERSION);
+       }
+
+       if (INT_GET(agi->agi_seqno, ARCH_CONVERT) != i)  {
+               retval = XR_AG_AGI;
+               do_warn("bad sequence # %d for agi %d\n", INT_GET(agi->agi_seqno, ARCH_CONVERT), i);
+
+               if (!no_modify)
+                       INT_SET(agi->agi_seqno, ARCH_CONVERT, i);
+       }
+
+       if (INT_GET(agi->agi_length, ARCH_CONVERT) != mp->m_sb.sb_agblocks)  {
+               if (i != mp->m_sb.sb_agcount - 1)  {
+                       retval = XR_AG_AGI;
+                       do_warn("bad length # %d for agi %d, should be %d\n",
+                               INT_GET(agi->agi_length, ARCH_CONVERT), i, mp->m_sb.sb_agblocks);
+                       if (!no_modify)
+                               INT_SET(agi->agi_length, ARCH_CONVERT, mp->m_sb.sb_agblocks);
+               } else  {
+                       agblocks = mp->m_sb.sb_dblocks -
+                               (xfs_drfsbno_t) mp->m_sb.sb_agblocks * i;
+
+                       if (INT_GET(agi->agi_length, ARCH_CONVERT) != agblocks)  {
+                               retval = XR_AG_AGI;
+                               do_warn(
+                       "bad length # %d for agi %d, should be %llu\n",
+                                       INT_GET(agi->agi_length, ARCH_CONVERT), i, agblocks);
+                               if (!no_modify)
+                                       INT_SET(agi->agi_length, ARCH_CONVERT, (xfs_agblock_t) agblocks);
+                       }
+               }
+       }
+
+       /* don't check inode btree -- will be checked by caller */
+
+       return(retval);
+}
+
+/*
+ * superblock comparison - compare arbitrary superblock with
+ *                     filesystem mount-point superblock
+ *
+ * the verified fields include id and geometry.
+
+ * the inprogress fields, version numbers, and counters
+ * are allowed to differ as well as all fields after the
+ * counters to cope with the pre-6.5 mkfs non-bzeroed
+ * secondary superblock sectors.
+ */
+
+int
+compare_sb(xfs_mount_t *mp, xfs_sb_t *sb)
+{
+       fs_geometry_t fs_geo, sb_geo;
+
+       get_sb_geometry(&fs_geo, &mp->m_sb);
+       get_sb_geometry(&sb_geo, sb);
+
+       if (memcmp(&fs_geo, &sb_geo,
+                  (char *) &fs_geo.sb_shared_vn - (char *) &fs_geo))
+               return(XR_SB_GEO_MISMATCH);
+
+       return(XR_OK);
+}
+
+/*
+ * possible fields that may have been set at mkfs time,
+ * sb_inoalignmt, sb_unit, sb_width.  We know that
+ * the quota inode fields in the secondaries should be zero.
+ * Likewise, the sb_flags and sb_shared_vn should also be
+ * zero and the shared version bit should be cleared for
+ * current mkfs's.
+ *
+ * And everything else in the buffer beyond sb_width should
+ * be zeroed.
+ */
+int
+secondary_sb_wack(xfs_mount_t *mp, xfs_buf_t *sbuf, xfs_sb_t *sb,
+       xfs_agnumber_t i)
+{
+       int do_bzero;
+       int size;
+       int *ip;
+       int rval;
+
+       rval = do_bzero = 0;
+
+       /*
+        * mkfs's that stamped a feature bit besides the ones in the mask
+        * (e.g. were pre-6.5 beta) could leave garbage in the secondary
+        * superblock sectors.  Anything stamping the shared fs bit or better
+        * into the secondaries is ok and should generate clean secondary
+        * superblock sectors.  so only run the bzero check on the
+        * potentially garbaged secondaries.
+        */
+       if (pre_65_beta ||
+           (sb->sb_versionnum & XR_GOOD_SECSB_VNMASK) == 0 ||
+           sb->sb_versionnum < XFS_SB_VERSION_4)  {
+               /*
+                * check for garbage beyond the last field set by the
+                * pre-6.5 mkfs's.  Don't blindly use sizeof(sb).
+                * Use field addresses instead so this code will still
+                * work against older filesystems when the superblock
+                * gets rev'ed again with new fields appended.
+                */
+               size = (__psint_t)&sb->sb_width + sizeof(sb->sb_width)
+                       - (__psint_t)sb;
+               for (ip = (int *)((__psint_t)sb + size);
+                    ip < (int *)((__psint_t)sb + mp->m_sb.sb_sectsize);
+                    ip++)  {
+                       if (*ip)  {
+                               do_bzero = 1;
+                               break;
+                       }
+               }
+
+               if (do_bzero)  {
+                       rval |= XR_AG_SB_SEC;
+                       if (!no_modify)  {
+                               do_warn(
+               "zeroing unused portion of secondary superblock %d sector\n",
+                                       i);
+                               bzero((void *)((__psint_t)sb + size),
+                                       mp->m_sb.sb_sectsize - size);
+                       } else
+                               do_warn(
+               "would zero unused portion of secondary superblock %d sector\n",
+                                       i);
+               }
+       }
+
+       /*
+        * now look for the fields we can manipulate directly.
+        * if we did a bzero and that bzero could have included
+        * the field in question, just silently reset it.  otherwise,
+        * complain.
+        *
+        * for now, just zero the flags field since only
+        * the readonly flag is used
+        */
+       if (sb->sb_flags)  {
+               if (!no_modify)
+                       sb->sb_flags = 0;
+               if (sb->sb_versionnum & XR_PART_SECSB_VNMASK || !do_bzero)  {
+                       rval |= XR_AG_SB;
+                       do_warn("bad flags field in superblock %d\n", i);
+               } else
+                       rval |= XR_AG_SB_SEC;
+       }
+
+       /*
+        * quota inodes and flags in secondary superblocks
+        * are never set by mkfs.  However, they could be set
+        * in a secondary if a fs with quotas was growfs'ed since
+        * growfs copies the new primary into the secondaries.
+        */
+       if (sb->sb_inprogress == 1 && sb->sb_uquotino)  {
+               if (!no_modify)
+                       sb->sb_uquotino = 0;
+               if (sb->sb_versionnum & XR_PART_SECSB_VNMASK || !do_bzero)  {
+                       rval |= XR_AG_SB;
+                       do_warn(
+                       "non-null user quota inode field in superblock %d\n",
+                               i);
+               } else
+                       rval |= XR_AG_SB_SEC;
+       }
+
+       if (sb->sb_inprogress == 1 && sb->sb_pquotino)  {
+               if (!no_modify)
+                       sb->sb_pquotino = 0;
+               if (sb->sb_versionnum & XR_PART_SECSB_VNMASK || !do_bzero)  {
+                       rval |= XR_AG_SB;
+                       do_warn(
+                       "non-null project quota inode field in superblock %d\n",
+                               i);
+               } else
+                       rval |= XR_AG_SB_SEC;
+       }
+
+       if (sb->sb_inprogress == 1 && sb->sb_qflags)  {
+               if (!no_modify)
+                       sb->sb_qflags = 0;
+               if (sb->sb_versionnum & XR_PART_SECSB_VNMASK || !do_bzero)  {
+                       rval |= XR_AG_SB;
+                       do_warn("non-null quota flags in superblock %d\n", i);
+               } else
+                       rval |= XR_AG_SB_SEC;
+       }
+
+       /*
+        * if the secondaries agree on a stripe unit/width or inode
+        * alignment, those fields ought to be valid since they are
+        * written at mkfs time (and the corresponding sb version bits
+        * are set).
+        */
+       if (!XFS_SB_VERSION_HASSHARED(sb) && sb->sb_shared_vn != 0)  {
+               if (!no_modify)
+                       sb->sb_shared_vn = 0;
+               if (sb->sb_versionnum & XR_PART_SECSB_VNMASK || !do_bzero)  {
+                       rval |= XR_AG_SB;
+                       do_warn("bad shared version number in superblock %d\n",
+                               i);
+               } else
+                       rval |= XR_AG_SB_SEC;
+       }
+
+       if (!XFS_SB_VERSION_HASALIGN(sb) && sb->sb_inoalignmt != 0)  {
+               if (!no_modify)
+                       sb->sb_inoalignmt = 0;
+               if (sb->sb_versionnum & XR_PART_SECSB_VNMASK || !do_bzero)  {
+                       rval |= XR_AG_SB;
+                       do_warn("bad inode alignment field in superblock %d\n",
+                               i);
+               } else
+                       rval |= XR_AG_SB_SEC;
+       }
+
+       if (!XFS_SB_VERSION_HASDALIGN(sb) &&
+           (sb->sb_unit != 0 || sb->sb_width != 0))  {
+               if (!no_modify)
+                       sb->sb_unit = sb->sb_width = 0;
+               if (sb->sb_versionnum & XR_GOOD_SECSB_VNMASK || !do_bzero)  {
+                       rval |= XR_AG_SB;
+                       do_warn(
+                       "bad stripe unit/width fields in superblock %d\n",
+                               i);
+               } else
+                       rval |= XR_AG_SB_SEC;
+       }
+
+       return(rval);
+}
+
+/*
+ * verify and reset the ag header if required.
+ *
+ * lower 4 bits of rval are set depending on what got modified.
+ * (see agheader.h for more details)
+ *
+ * NOTE -- this routine does not tell the user that it has
+ * altered things.  Rather, it is up to the caller to do so
+ * using the bits encoded into the return value.
+ */
+
+int
+verify_set_agheader(xfs_mount_t *mp, xfs_buf_t *sbuf, xfs_sb_t *sb,
+       xfs_agf_t *agf, xfs_agi_t *agi, xfs_agnumber_t i)
+{
+       int rval = 0;
+       int status = XR_OK;
+       int status_sb = XR_OK;
+
+       status = verify_sb(sb, (i == 0));
+
+       if (status != XR_OK)  {
+               do_warn("bad on-disk superblock %d - %s\n",
+                       i, err_string(status));
+       }
+
+       status_sb = compare_sb(mp, sb);
+
+       if (status_sb != XR_OK)  {
+               do_warn("primary and secondary superblock %d conflict - %s\n",
+                       i, err_string(status_sb));
+       }
+
+       if (status != XR_OK || status_sb != XR_OK)  {
+               if (!no_modify)  {
+                       *sb = mp->m_sb;
+
+                       /*
+                        * clear the more transient fields
+                        */
+                       sb->sb_inprogress = 1;
+
+                       sb->sb_icount = 0;
+                       sb->sb_ifree = 0;
+                       sb->sb_fdblocks = 0;
+                       sb->sb_frextents = 0;
+
+                       sb->sb_qflags = 0;
+               }
+
+               rval |= XR_AG_SB;
+       }
+
+       rval |= secondary_sb_wack(mp, sbuf, sb, i);
+
+       rval |= verify_set_agf(mp, agf, i);
+       rval |= verify_set_agi(mp, agi, i);
+
+       return(rval);
+}
diff --git a/repair/agheader.h b/repair/agheader.h
new file mode 100644 (file)
index 0000000..48326f7
--- /dev/null
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+typedef struct fs_geometry  {
+       /*
+        * these types should match the superblock types
+        */
+       __uint32_t      sb_blocksize;   /* blocksize (bytes) */
+       xfs_drfsbno_t   sb_dblocks;     /* # data blocks */
+       xfs_drfsbno_t   sb_rblocks;     /* # realtime blocks */
+       xfs_drtbno_t    sb_rextents;    /* # realtime extents */
+       uuid_t          sb_uuid;        /* fs uuid */
+       xfs_dfsbno_t    sb_logstart;    /* starting log block # */
+       xfs_agblock_t   sb_rextsize;    /* realtime extent size (blocks )*/
+       xfs_agblock_t   sb_agblocks;    /* # of blocks per ag */
+       xfs_agnumber_t  sb_agcount;     /* # of ags */
+       xfs_extlen_t    sb_rbmblocks;   /* # of rt bitmap blocks */
+       xfs_extlen_t    sb_logblocks;   /* # of log blocks */
+       __uint16_t      sb_sectsize;    /* volume sector size (bytes) */
+       __uint16_t      sb_inodesize;   /* inode size (bytes) */
+       __uint8_t       sb_imax_pct;    /* max % of fs for inode space */
+
+       /*
+        * these don't have to match the superblock types but are placed
+        * before sb_shared_vn because these values don't have to be
+        * checked manually.  These variables will be set only on
+        * filesystems with dependably good (fully initialized)
+        * secondary superblock sectors, will be stamped in all
+        * superblocks at mkfs time, and are features that cannot
+        * be downgraded unless all superblocks in the filesystem
+        * are rewritten.
+        */
+       int             sb_extflgbit;   /* extent flag feature bit set */
+
+       /*
+        * fields after this point have to be checked manually in compare_sb()
+        */
+       __uint8_t       sb_shared_vn;   /* shared version number */
+       xfs_extlen_t    sb_inoalignmt;  /* inode chunk alignment, fsblocks */
+       __uint32_t      sb_unit;        /* stripe or raid unit */
+       __uint32_t      sb_width;       /* stripe or width unit */
+
+       /*
+        * these don't have to match, they track superblock properties
+        * that could have been upgraded and/or downgraded during
+        * run-time so that the primary superblock has them but the
+        * secondaries do not.
+        * Plus, they have associated data fields whose data fields may
+        * be corrupt in cases where the filesystem was made on a
+        * pre-6.5 campus alpha mkfs and the feature was enabled on
+        * the filesystem later.
+        */
+       int             sb_ialignbit;   /* sb has inode alignment bit set */
+       int             sb_salignbit;   /* sb has stripe alignment bit set */
+       int             sb_sharedbit;   /* sb has inode alignment bit set */
+
+       int             sb_fully_zeroed; /* has zeroed secondary sb sectors */
+} fs_geometry_t;
+
+typedef struct fs_geo_list  {
+       struct fs_geo_list      *next;
+       int                     refs;
+       int                     index;
+       fs_geometry_t           geo;
+} fs_geo_list_t;
+
+/*
+ * fields for sb_last_nonzero
+ */
+
+#define XR_SB_COUNTERS         0x0001
+#define XR_SB_INOALIGN         0x0002
+#define XR_SB_SALIGN           0x0004
+
+/*
+ * what got modified by verify_set_* routines
+ */
+
+#define XR_AG_SB       0x1
+#define XR_AG_AGF      0x2
+#define XR_AG_AGI      0x4
+#define XR_AG_SB_SEC   0x8
+
+
diff --git a/repair/attr_repair.c b/repair/attr_repair.c
new file mode 100644 (file)
index 0000000..d64230b
--- /dev/null
@@ -0,0 +1,1067 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include <errno.h>
+#include <acl.h>
+
+#include "globals.h"
+#include "err_protos.h"
+#include "dir.h"
+#include "dinode.h"
+#include "bmap.h"
+
+static int acl_valid(struct acl *aclp);
+static int mac_valid(mac_t lp);
+
+
+/*
+ * For attribute repair, there are 3 formats to worry about. First, is 
+ * shortform attributes which reside in the inode. Second is the leaf
+ * form, and lastly the btree. Much of this models after the directory
+ * structure so code resembles the directory repair cases. 
+ * For shortform case, if an attribute looks corrupt, it is removed.
+ * If that leaves the shortform down to 0 attributes, it's okay and 
+ * will appear to just have a null attribute fork. Some checks are done
+ * for validity of the value field based on what the security needs are.
+ * Calls will be made out to mac_valid or acl_valid libc libraries if
+ * the security attributes exist. They will be cleared if invalid. No
+ * other values will be checked. The DMF folks do not have current
+ * requirements, but may in the future.
+ *
+ * For leaf block attributes, it requires more processing. One sticky
+ * point is that the attributes can be local (within the leaf) or 
+ * remote (outside the leaf in other blocks). Thinking of local only
+ * if you get a bad attribute, and want to delete just one, its a-okay
+ * if it remains large enough to still be a leaf block attribute. Otherwise,
+ * it may have to be converted to shortform. How to convert this and when
+ * is an issue. This call is happening in Phase3. Phase5 will capture empty
+ * blocks, but Phase6 allows you to use the simulation library which knows
+ * how to handle attributes in the kernel for converting formats. What we
+ * could do is mark an attribute to be cleared now, but in phase6 somehow
+ * have it cleared for real and then the format changed to shortform if
+ * applicable. Since this requires more work than I anticipate can be
+ * accomplished for the next release, we will instead just say any bad
+ * attribute in the leaf block will make the entire attribute fork be
+ * cleared. The simplest way to do that is to ignore the leaf format, and
+ * call clear_dinode_attr to just make a shortform attribute fork with
+ * zero entries. 
+ *
+ * Another issue with handling repair on leaf attributes is the remote
+ * blocks. To make sure that they look good and are not used multiple times
+ * by the attribute fork, some mechanism to keep track of all them is necessary.
+ * Do this in the future, time permitting. For now, note that there is no
+ * check for remote blocks and their allocations.
+ *
+ * For btree formatted attributes, the model can follow directories. That
+ * would mean go down the tree to the leftmost leaf. From there moving down
+ * the links and processing each. They would call back up the tree, to verify
+ * that the tree structure is okay. Any problems will result in the attribute
+ * fork being emptied and put in shortform format.
+ */
+
+/*
+ * This routine just checks what security needs are for attribute values
+ * only called when root flag is set, otherwise these names could exist in
+ * in user attribute land without a conflict.
+ * If value is non-zero, then a remote attribute is being passed in
+ */
+
+int
+valuecheck(char *namevalue, char *value, int namelen, int valuelen)
+{
+       /* for proper alignment issues, get the structs and bcopy the values */
+       mac_label macl;
+       struct acl thisacl;
+       void *valuep;
+       int clearit = 0;
+
+       if ((strncmp(namevalue, SGI_ACL_FILE, SGI_ACL_FILE_SIZE) == 0) || 
+                       (strncmp(namevalue, SGI_ACL_DEFAULT, 
+                               SGI_ACL_DEFAULT_SIZE) == 0)) {
+               if (value == NULL) {    
+                       bzero(&thisacl, sizeof(struct acl));
+                       bcopy(namevalue+namelen, &thisacl, valuelen);
+                       valuep = &thisacl;
+               } else
+                       valuep = value;
+
+               if (acl_valid((struct acl *) valuep) != 0) { /* 0 means valid */
+                       clearit = 1;
+                       do_warn("entry contains illegal value in attribute named SGI_ACL_FILE or SGI_ACL_DEFAULT\n");
+               }
+       } else if (strncmp(namevalue, SGI_MAC_FILE, SGI_MAC_FILE_SIZE) == 0) {
+               if (value == NULL) {
+                       bzero(&macl, sizeof(mac_label));
+                       bcopy(namevalue+namelen, &macl, valuelen);
+                       valuep = &macl;
+               } else 
+                       valuep = value;
+
+               if (mac_valid((mac_label *) valuep) != 1) { /* 1 means valid */
+                        /*
+                        *if sysconf says MAC enabled, 
+                        *      temp = mac_from_text("msenhigh/mintlow", NULL)
+                        *      copy it to value, update valuelen, totsize
+                        *      This causes pushing up or down of all following
+                        *      attributes, forcing a attribute format change!!
+                        * else clearit = 1;
+                        */
+                       clearit = 1;
+                       do_warn("entry contains illegal value in attribute named SGI_MAC_LABEL\n");
+               }
+       } else if (strncmp(namevalue, SGI_CAP_FILE, SGI_CAP_FILE_SIZE) == 0) {
+               if ( valuelen != sizeof(cap_set_t)) {
+                       clearit = 1;
+                       do_warn("entry contains illegal value in attribute named SGI_CAP_FILE\n");
+               }
+       }
+
+       return(clearit);
+}
+
+
+/*
+ * this routine validates the attributes in shortform format.
+ * a non-zero return repair value means certain attributes are bogus
+ * and were cleared if possible. Warnings do not generate error conditions
+ * if you cannot modify the structures. repair is set to 1, if anything
+ * was fixed.
+ */
+int
+process_shortform_attr(
+       xfs_ino_t       ino,
+       xfs_dinode_t    *dip,
+       int             *repair)        
+{
+       xfs_attr_shortform_t    *asf;
+       xfs_attr_sf_entry_t     *currententry, *nextentry, *tempentry;
+       int                     i, junkit;
+       int                     currentsize, remainingspace;
+       
+       *repair = 0;
+
+       asf = (xfs_attr_shortform_t *) XFS_DFORK_APTR_ARCH(dip, ARCH_CONVERT);
+
+       /* Assumption: hdr.totsize is less than a leaf block and was checked
+        * by lclinode for valid sizes. Check the count though. 
+       */
+       if (INT_GET(asf->hdr.count, ARCH_CONVERT) == 0) 
+               /* then the total size should just be the header length */
+               if (INT_GET(asf->hdr.totsize, ARCH_CONVERT) != sizeof(xfs_attr_sf_hdr_t)) {
+                       /* whoops there's a discrepancy. Clear the hdr */
+                       if (!no_modify) {
+                               do_warn("there are no attributes in the fork for inode %llu \n", ino);
+                               INT_SET(asf->hdr.totsize, ARCH_CONVERT,
+                                               sizeof(xfs_attr_sf_hdr_t));
+                               *repair = 1;
+                               return(1);      
+                       } else {
+                               do_warn("would junk the attribute fork since the count is 0 for inode %llu\n",ino);
+                               return(1);
+                       }
+                }
+               
+       currentsize = sizeof(xfs_attr_sf_hdr_t); 
+       remainingspace = INT_GET(asf->hdr.totsize, ARCH_CONVERT) - currentsize;
+       nextentry = &asf->list[0];
+       for (i = 0; i < INT_GET(asf->hdr.count, ARCH_CONVERT); i++)  {
+               currententry = nextentry;
+               junkit = 0;
+
+               /* don't go off the end if the hdr.count was off */
+               if ((currentsize + (sizeof(xfs_attr_sf_entry_t) - 1)) > 
+                               INT_GET(asf->hdr.totsize, ARCH_CONVERT))
+                       break; /* get out and reset count and totSize */
+
+               /* if the namelen is 0, can't get to the rest of the entries */
+               if (INT_GET(currententry->namelen, ARCH_CONVERT) == 0) {
+                       do_warn("zero length name entry in attribute fork, ");
+                       if (!no_modify) {
+                               do_warn("truncating attributes for inode %llu to %d \n", ino, i);
+                               *repair = 1;
+                               break;  /* and then update hdr fields */
+                       } else {
+                               do_warn("would truncate attributes for inode %llu to %d \n", ino, i);
+                               break;
+                       }
+               } else {
+                       /* It's okay to have a 0 length valuelen, but do a
+                        * rough check to make sure we haven't gone outside of
+                        * totsize.
+                        */
+                       if ((remainingspace < INT_GET(currententry->namelen, ARCH_CONVERT)) ||
+                               ((remainingspace - INT_GET(currententry->namelen, ARCH_CONVERT))
+                                         < INT_GET(currententry->valuelen, ARCH_CONVERT))) {
+                               do_warn("name or value attribute lengths are too large, \n");
+                               if (!no_modify) {
+                                       do_warn(" truncating attributes for inode %llu to %d \n", ino, i);
+                                       *repair = 1; 
+                                       break; /* and then update hdr fields */
+                               } else {
+                                       do_warn(" would truncate attributes for inode %llu to %d \n", ino, i);  
+                                       break;
+                               }       
+                       }
+               }
+       
+               /* namecheck checks for / and null terminated for file names. 
+                * attributes names currently follow the same rules.
+               */
+               if (namecheck((char *)&currententry->nameval[0], 
+                               INT_GET(currententry->namelen, ARCH_CONVERT)))  {
+                       do_warn("entry contains illegal character in shortform attribute name\n");
+                       junkit = 1;
+               }
+
+               if (INT_GET(currententry->flags, ARCH_CONVERT) & XFS_ATTR_INCOMPLETE) {
+                       do_warn("entry has INCOMPLETE flag on in shortform attribute\n");
+                       junkit = 1;
+               }
+
+               /* Only check values for root security attributes */
+               if (INT_GET(currententry->flags, ARCH_CONVERT) & XFS_ATTR_ROOT) 
+                      junkit = valuecheck((char *)&currententry->nameval[0], NULL, 
+                               INT_GET(currententry->namelen, ARCH_CONVERT), INT_GET(currententry->valuelen, ARCH_CONVERT));
+
+               remainingspace = remainingspace - 
+                               XFS_ATTR_SF_ENTSIZE(currententry);
+
+               if (junkit) {
+                       if (!no_modify) {
+                               /* get rid of only this entry */
+                               do_warn("removing attribute entry %d for inode %llu \n", i, ino);
+                               tempentry = (xfs_attr_sf_entry_t *)
+                                       ((__psint_t) currententry +
+                                        XFS_ATTR_SF_ENTSIZE(currententry));
+                               memmove(currententry,tempentry,remainingspace);
+                               INT_MOD(asf->hdr.count, ARCH_CONVERT, -1);
+                               i--; /* no worries, it will wrap back to 0 */
+                               *repair = 1;
+                               continue; /* go back up now */
+                       } else { 
+                               do_warn("would remove attribute entry %d for inode %llu \n", i, ino);
+                        }
+                }
+
+               /* Let's get ready for the next entry... */
+               nextentry = (xfs_attr_sf_entry_t *)
+                        ((__psint_t) nextentry +
+                        XFS_ATTR_SF_ENTSIZE(currententry));
+               currentsize = currentsize + XFS_ATTR_SF_ENTSIZE(currententry);
+       
+               } /* end the loop */
+
+       
+       if (INT_GET(asf->hdr.count, ARCH_CONVERT) != i)  {
+               if (no_modify)  {
+                       do_warn("would have corrected attribute entry count in inode %llu from %d to %d\n",
+                               ino, INT_GET(asf->hdr.count, ARCH_CONVERT), i);
+               } else  {
+                       do_warn("corrected attribute entry count in inode %llu, was %d, now %d\n",
+                               ino, INT_GET(asf->hdr.count, ARCH_CONVERT), i);
+                       INT_SET(asf->hdr.count, ARCH_CONVERT, i);
+                       *repair = 1;
+               }
+       }
+       
+       /* ASSUMPTION: currentsize <= totsize */
+       if (INT_GET(asf->hdr.totsize, ARCH_CONVERT) != currentsize)  {
+               if (no_modify)  {
+                       do_warn("would have corrected attribute totsize in inode %llu from %d to %d\n",
+                               ino, INT_GET(asf->hdr.totsize, ARCH_CONVERT), currentsize);
+               } else  {
+                       do_warn("corrected attribute entry totsize in inode %llu, was %d, now %d\n",
+                               ino, INT_GET(asf->hdr.totsize, ARCH_CONVERT), currentsize);
+                       INT_SET(asf->hdr.totsize, ARCH_CONVERT, currentsize);
+                       *repair = 1;
+               }
+       }
+
+       return(*repair);
+}
+
+/* This routine brings in blocks from disk one by one and assembles them
+ * in the value buffer. If get_bmapi gets smarter later to return an extent
+ * or list of extents, that would be great. For now, we don't expect too
+ * many blocks per remote value, so one by one is sufficient.
+ */
+static int
+rmtval_get(xfs_mount_t *mp, xfs_ino_t ino, blkmap_t *blkmap,
+               xfs_dablk_t blocknum, int valuelen, char* value)
+{
+       xfs_dfsbno_t    bno;
+       xfs_buf_t       *bp;
+       int             clearit = 0, i = 0, length = 0, amountdone = 0;
+       
+       /* ASSUMPTION: valuelen is a valid number, so use it for looping */
+       /* Note that valuelen is not a multiple of blocksize */  
+       while (amountdone < valuelen) {
+               bno = blkmap_get(blkmap, blocknum + i);
+               if (bno == NULLDFSBNO) {
+                       do_warn("remote block for attributes of inode %llu"
+                               " is missing\n", ino);
+                       clearit = 1;
+                       break;
+               }
+               bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, bno),
+                               XFS_FSB_TO_BB(mp, 1), 0);
+               if (!bp) {
+                       do_warn("can't read remote block for attributes"
+                               " of inode %llu\n", ino);
+                       clearit = 1;
+                       break;
+               }
+               ASSERT(mp->m_sb.sb_blocksize == XFS_BUF_COUNT(bp));
+               length = MIN(XFS_BUF_COUNT(bp), valuelen - amountdone);
+               bcopy(XFS_BUF_PTR(bp), value, length); 
+               amountdone += length;
+               value += length;
+               i++;
+               libxfs_putbuf(bp);
+       }
+       return (clearit);
+}
+
+/*
+ * freespace map for directory and attribute leaf blocks (1 bit per byte)
+ * 1 == used, 0 == free
+ */
+static da_freemap_t attr_freemap[DA_BMAP_SIZE];
+
+/* The block is read in. The magic number and forward / backward
+ * links are checked by the caller process_leaf_attr.
+ * If any problems occur the routine returns with non-zero. In
+ * this case the next step is to clear the attribute fork, by
+ * changing it to shortform and zeroing it out. Forkoff need not
+ * be changed. 
+ */
+
+int
+process_leaf_attr_block(
+       xfs_mount_t     *mp,
+       xfs_attr_leafblock_t *leaf,
+       xfs_dablk_t     da_bno,
+       xfs_ino_t       ino,
+       blkmap_t        *blkmap,
+       xfs_dahash_t    last_hashval,
+       xfs_dahash_t    *current_hashval,
+       int             *repair)        
+{
+       xfs_attr_leaf_entry_t *entry;
+       xfs_attr_leaf_name_local_t *local;
+       xfs_attr_leaf_name_remote_t *remotep;
+       int  i, start, stop, clearit, usedbs, firstb, thissize;
+
+       clearit = usedbs = 0;
+       *repair = 0;
+       firstb = mp->m_sb.sb_blocksize; 
+       stop = sizeof(xfs_attr_leaf_hdr_t);
+
+       /* does the count look sorta valid? */
+       if (INT_GET(leaf->hdr.count, ARCH_CONVERT)
+                               * sizeof(xfs_attr_leaf_entry_t)
+                               + sizeof(xfs_attr_leaf_hdr_t)
+                                                       > XFS_LBSIZE(mp)) {
+               do_warn("bad attribute count %d in attr block %u, inode %llu\n",
+                       (int) INT_GET(leaf->hdr.count, ARCH_CONVERT),
+                                               da_bno, ino);
+               return (1);
+       }
+       init_da_freemap(attr_freemap);
+       (void) set_da_freemap(mp, attr_freemap, 0, stop);
+       
+       /* go thru each entry checking for problems */
+       for (i = 0, entry = &leaf->entries[0]; 
+                       i < INT_GET(leaf->hdr.count, ARCH_CONVERT);
+                                               i++, entry++) {
+       
+               /* check if index is within some boundary. */
+               if (INT_GET(entry->nameidx, ARCH_CONVERT) > XFS_LBSIZE(mp)) {
+                       do_warn("bad attribute nameidx %d in attr block %u, inode %llu\n",
+                               (int)INT_GET(entry->nameidx, ARCH_CONVERT),
+                               da_bno,ino);
+                       clearit = 1;
+                       break;
+                       }
+
+               if (INT_GET(entry->flags, ARCH_CONVERT) & XFS_ATTR_INCOMPLETE) {
+                       /* we are inconsistent state. get rid of us */
+                       do_warn("attribute entry #%d in attr block %u, inode %llu is INCOMPLETE\n",
+                               i, da_bno, ino);
+                       clearit = 1;
+                       break;
+                       }
+
+               /* mark the entry used */
+               start = (__psint_t)&leaf->entries[i] - (__psint_t)leaf;
+               stop = start + sizeof(xfs_attr_leaf_entry_t);
+               if (set_da_freemap(mp, attr_freemap, start, stop))  {
+                       do_warn("attribute entry %d in attr block %u, inode %llu claims already used space\n",
+                               i,da_bno,ino);
+                       clearit = 1;
+                       break;  /* got an overlap */
+                       }
+
+               if (INT_GET(entry->flags, ARCH_CONVERT) & XFS_ATTR_LOCAL) {
+
+                       local = XFS_ATTR_LEAF_NAME_LOCAL(leaf, i);      
+                       if ((INT_GET(local->namelen, ARCH_CONVERT) == 0) || 
+                                       (namecheck((char *)&local->nameval[0], 
+                                               INT_GET(local->namelen, ARCH_CONVERT)))) {
+                               do_warn("attribute entry %d in attr block %u, inode %llu has bad name (namelen = %d)\n",
+                                       i, da_bno, ino, (int) INT_GET(local->namelen, ARCH_CONVERT));
+
+                               clearit = 1;
+                               break;
+                               };
+
+                       /* Check on the hash value. Checking ordering of hash values
+                        * is not necessary, since one wrong one clears the whole
+                        * fork. If the ordering's wrong, it's caught here or 
+                        * the kernel code has a bug with transaction logging
+                        * or attributes itself. For paranoia reasons, let's check
+                        * ordering anyway in case both the name value and the 
+                        * hashvalue were wrong but matched. Unlikely, however.
+                       */
+                       if (INT_GET(entry->hashval, ARCH_CONVERT) != 
+                               libxfs_da_hashname((char *)&local->nameval[0],
+                                       INT_GET(local->namelen, ARCH_CONVERT)) ||
+                               (INT_GET(entry->hashval, ARCH_CONVERT)
+                                                       < last_hashval)) {
+                               do_warn("bad hashvalue for attribute entry %d in attr block %u, inode %llu\n",
+                                       i, da_bno, ino);
+                               clearit = 1;
+                               break;
+                       }
+
+                       /* Only check values for root security attributes */
+                       if (INT_GET(entry->flags, ARCH_CONVERT) & XFS_ATTR_ROOT) 
+                               if (valuecheck((char *)&local->nameval[0], NULL,
+                                           INT_GET(local->namelen, ARCH_CONVERT), INT_GET(local->valuelen, ARCH_CONVERT))) {
+                                       do_warn("bad security value for attribute entry %d in attr block %u, inode %llu\n",
+                                               i,da_bno,ino);
+                                       clearit = 1;
+                                       break;
+                               };
+                       thissize = XFS_ATTR_LEAF_ENTSIZE_LOCAL(
+                                       INT_GET(local->namelen, ARCH_CONVERT), INT_GET(local->valuelen, ARCH_CONVERT));
+
+               } else {
+                       /* do the remote case */
+                       remotep = XFS_ATTR_LEAF_NAME_REMOTE(leaf, i);
+                       thissize = XFS_ATTR_LEAF_ENTSIZE_REMOTE(
+                                       INT_GET(remotep->namelen, ARCH_CONVERT)); 
+
+                       if ((INT_GET(remotep->namelen, ARCH_CONVERT) == 0) || 
+                                  (namecheck((char *)&remotep->name[0],
+                                       INT_GET(remotep->namelen, ARCH_CONVERT))) ||
+                                  (INT_GET(entry->hashval, ARCH_CONVERT)
+                                               != libxfs_da_hashname(
+                                       (char *)&remotep->name[0],
+                                        INT_GET(remotep->namelen, ARCH_CONVERT))) ||
+                                  (INT_GET(entry->hashval, ARCH_CONVERT)
+                                               < last_hashval) ||
+                                  (INT_GET(remotep->valueblk, ARCH_CONVERT) == 0)) {
+                               do_warn("inconsistent remote attribute entry %d in attr block %u, ino %llu\n",
+                                       i, da_bno, ino);
+                               clearit = 1;
+                               break;
+                       };
+
+                       if (INT_GET(entry->flags, ARCH_CONVERT) & XFS_ATTR_ROOT) {
+                               char*   value;
+                               if ((value = malloc(INT_GET(remotep->valuelen, ARCH_CONVERT)))==NULL){
+                                       do_warn("cannot malloc enough for remotevalue attribute for inode %llu\n",ino);
+                                       do_warn("SKIPPING this remote attribute\n");
+                                       continue;
+                               }
+                               if (rmtval_get(mp, ino, blkmap,
+                                               INT_GET(remotep->valueblk, ARCH_CONVERT),
+                                               INT_GET(remotep->valuelen, ARCH_CONVERT), value)) {
+                                       do_warn("remote attribute get failed for entry %d, inode %llu\n", i,ino);
+                                       clearit = 1;
+                                       free(value);
+                                       break;
+                               }
+                               if (valuecheck((char *)&remotep->name[0], value,
+                                           INT_GET(remotep->namelen, ARCH_CONVERT), INT_GET(remotep->valuelen, ARCH_CONVERT))){
+                                       do_warn("remote attribute value check  failed for entry %d, inode %llu\n", i, ino);
+                                       clearit = 1;
+                                       free(value);
+                                       break;
+                               }
+                               free(value);
+                       }
+               }
+
+               *current_hashval = last_hashval 
+                                = INT_GET(entry->hashval, ARCH_CONVERT);
+
+               if (set_da_freemap(mp, attr_freemap, INT_GET(entry->nameidx, ARCH_CONVERT),
+                               INT_GET(entry->nameidx, ARCH_CONVERT) + thissize))  {
+                       do_warn("attribute entry %d in attr block %u, inode %llu claims used space\n",
+                               i, da_bno, ino);
+                       clearit = 1;
+                       break;  /* got an overlap */
+               }                       
+               usedbs += thissize;
+               if (INT_GET(entry->nameidx, ARCH_CONVERT) < firstb) 
+                       firstb = INT_GET(entry->nameidx, ARCH_CONVERT);
+
+       } /* end the loop */
+
+       if (!clearit) {
+               /* verify the header information is correct */
+
+               /* if the holes flag is set, don't reset first_used unless it's
+                * pointing to used bytes.  we're being conservative here
+                * since the block will get compacted anyhow by the kernel. 
+                */
+
+               if (  (INT_GET(leaf->hdr.holes, ARCH_CONVERT) == 0
+                   && firstb != INT_GET(leaf->hdr.firstused, ARCH_CONVERT))
+                   || INT_GET(leaf->hdr.firstused, ARCH_CONVERT) > firstb)  {
+                       if (!no_modify)  {
+                               do_warn("- resetting first used heap value from %d to %d in block %u of attribute fork of inode %llu\n",
+                                       (int)INT_GET(leaf->hdr.firstused,
+                                               ARCH_CONVERT), firstb,
+                                               da_bno, ino);
+                               INT_SET(leaf->hdr.firstused,
+                                               ARCH_CONVERT, firstb);
+                               *repair = 1;
+                       } else  {
+                               do_warn("- would reset first used value from %d to %d in block %u of attribute fork of inode %llu\n",
+                                       (int)INT_GET(leaf->hdr.firstused,
+                                               ARCH_CONVERT), firstb,
+                                               da_bno, ino);
+                       }
+               }
+
+               if (usedbs != INT_GET(leaf->hdr.usedbytes, ARCH_CONVERT))  {
+                       if (!no_modify)  {
+                               do_warn("- resetting usedbytes cnt from %d to %d in block %u of attribute fork of inode %llu\n",
+                                       (int)INT_GET(leaf->hdr.usedbytes,
+                                         ARCH_CONVERT), usedbs, da_bno, ino);
+                               INT_SET(leaf->hdr.usedbytes,
+                                               ARCH_CONVERT, usedbs);
+                               *repair = 1;
+                       } else  {
+                               do_warn("- would reset usedbytes cnt from %d to %d in block %u of attribute fork of %llu\n",
+                                       (int)INT_GET(leaf->hdr.usedbytes,
+                                           ARCH_CONVERT), usedbs,da_bno,ino);
+                       }
+               }
+
+               /* there's a lot of work in process_leaf_dir_block to go thru
+               * checking for holes and compacting if appropiate. I don't think
+               * attributes need all that, so let's just leave the holes. If
+               * we discover later that this is a good place to do compaction
+               * we can add it then. 
+               */
+       }
+       return (clearit);  /* and repair */
+}
+
+
+/*
+ * returns 0 if the attribute fork is ok, 1 if it has to be junked.
+ */
+int
+process_leaf_attr_level(xfs_mount_t    *mp,
+                       da_bt_cursor_t  *da_cursor)
+{
+       int                     repair;
+       xfs_attr_leafblock_t    *leaf;
+       xfs_buf_t               *bp;
+       xfs_ino_t               ino;
+       xfs_dfsbno_t            dev_bno;
+       xfs_dablk_t             da_bno;
+       xfs_dablk_t             prev_bno;
+       xfs_dahash_t            current_hashval = 0;
+       xfs_dahash_t            greatest_hashval;
+
+       da_bno = da_cursor->level[0].bno;
+       ino = da_cursor->ino;
+       prev_bno = 0;
+
+       do {
+               repair = 0;
+               dev_bno = blkmap_get(da_cursor->blkmap, da_bno);
+               /*
+                * 0 is the root block and no block
+                * pointer can point to the root block of the btree
+                */
+               ASSERT(da_bno != 0);
+
+               if (dev_bno == NULLDFSBNO) {
+                       do_warn("can't map block %u for attribute fork "
+                               "for inode %llu\n", da_bno, ino);
+                       goto error_out; 
+               }
+
+               bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, dev_bno),
+                                       XFS_FSB_TO_BB(mp, 1), 0);
+               if (!bp) {
+                       do_warn("can't read file block %u (fsbno %llu) for"
+                               " attribute fork of inode %llu\n",
+                               da_bno, dev_bno, ino);
+                       goto error_out;
+               }
+
+               leaf = (xfs_attr_leafblock_t *)XFS_BUF_PTR(bp);
+
+               /* check magic number for leaf directory btree block */
+               if (INT_GET(leaf->hdr.info.magic, ARCH_CONVERT)
+                                               != XFS_ATTR_LEAF_MAGIC) {
+                       do_warn("bad attribute leaf magic %#x for inode %llu\n",
+                                leaf->hdr.info.magic, ino);
+                       libxfs_putbuf(bp);
+                       goto error_out;
+               }
+
+               /*
+                * for each block, process the block, verify it's path,
+                * then get next block.  update cursor values along the way
+                */
+               if (process_leaf_attr_block(mp, leaf, da_bno, ino,
+                               da_cursor->blkmap, current_hashval,
+                               &greatest_hashval, &repair))  {
+                       libxfs_putbuf(bp);
+                       goto error_out;
+               }
+
+               /*
+                * index can be set to hdr.count so match the
+                * indexes of the interior blocks -- which at the
+                * end of the block will point to 1 after the final
+                * real entry in the block
+                */
+               da_cursor->level[0].hashval = greatest_hashval;
+               da_cursor->level[0].bp = bp;
+               da_cursor->level[0].bno = da_bno;
+               da_cursor->level[0].index
+                               = INT_GET(leaf->hdr.count, ARCH_CONVERT);
+               da_cursor->level[0].dirty = repair; 
+
+               if (INT_GET(leaf->hdr.info.back, ARCH_CONVERT) != prev_bno)  {
+                       do_warn("bad sibling back pointer for block %u in "
+                               "attribute fork for inode %llu\n", da_bno, ino);
+                       libxfs_putbuf(bp);
+                       goto error_out;
+               }
+
+               prev_bno = da_bno;
+               da_bno = INT_GET(leaf->hdr.info.forw, ARCH_CONVERT);
+
+               if (da_bno != 0 && verify_da_path(mp, da_cursor, 0))  {
+                       libxfs_putbuf(bp);
+                       goto error_out;
+               }
+
+               current_hashval = greatest_hashval;
+
+               if (repair && !no_modify) {
+                       libxfs_writebuf(bp, 0);
+               }
+               else {
+                       libxfs_putbuf(bp);
+               }
+       } while (da_bno != 0);
+
+       if (verify_final_da_path(mp, da_cursor, 0))  {
+               /*
+                * verify the final path up (right-hand-side) if still ok
+                */
+               do_warn("bad hash path in attribute fork for inode %llu\n",
+                       da_cursor->ino);
+               goto error_out;
+       }
+
+       /* releases all buffers holding interior btree blocks */
+       release_da_cursor(mp, da_cursor, 0);
+       return(0);
+
+error_out:
+       /* release all buffers holding interior btree blocks */
+       err_release_da_cursor(mp, da_cursor, 0);
+       return(1);
+}
+
+
+/*
+ * a node directory is a true btree  -- where the attribute fork
+ * has gotten big enough that it is represented as a non-trivial (e.g.
+ * has more than just a block) btree.
+ *
+ * Note that if we run into any problems, we will trash the attribute fork.
+ * 
+ * returns 0 if things are ok, 1 if bad
+ * Note this code has been based off process_node_dir. 
+ */
+int
+process_node_attr(
+       xfs_mount_t     *mp,
+       xfs_ino_t       ino,
+       xfs_dinode_t    *dip,
+       blkmap_t        *blkmap)
+{
+       xfs_dablk_t                     bno;
+       int                             error = 0;
+       da_bt_cursor_t                  da_cursor;
+
+       /*
+        * try again -- traverse down left-side of tree until we hit
+        * the left-most leaf block setting up the btree cursor along
+        * the way.  Then walk the leaf blocks left-to-right, calling
+        * a parent-verification routine each time we traverse a block.
+        */
+       bzero(&da_cursor, sizeof(da_bt_cursor_t));
+       da_cursor.active = 0;
+       da_cursor.type = 0;
+       da_cursor.ino = ino;
+       da_cursor.dip = dip;
+       da_cursor.greatest_bno = 0;
+       da_cursor.blkmap = blkmap;
+
+       /*
+        * now process interior node. don't have any buffers held in this path.
+        */
+       error = traverse_int_dablock(mp, &da_cursor, &bno, XFS_ATTR_FORK);
+       if (error == 0) 
+               return(1);  /* 0 means unsuccessful */
+
+       /*
+        * now pass cursor and bno into leaf-block processing routine
+        * the leaf dir level routine checks the interior paths
+        * up to the root including the final right-most path.
+        */
+       
+       return (process_leaf_attr_level(mp, &da_cursor));
+}
+
+/*
+ * Start processing for a leaf or fuller btree.
+ * A leaf directory is one where the attribute fork is too big for
+ * the inode  but is small enough to fit into one btree block
+ * outside the inode. This code is modelled after process_leaf_dir_block.
+ *
+ * returns 0 if things are ok, 1 if bad (attributes needs to be junked)
+ * repair is set, if anything was changed, but attributes can live thru it
+ */
+
+int
+process_longform_attr(
+       xfs_mount_t     *mp,
+       xfs_ino_t       ino,
+       xfs_dinode_t    *dip,
+       blkmap_t        *blkmap,
+       int             *repair)        /* out - 1 if something was fixed */
+{
+       xfs_attr_leafblock_t    *leaf;
+       xfs_dfsbno_t    bno;
+       xfs_buf_t       *bp;
+       xfs_dahash_t    next_hashval;
+       int             repairlinks = 0;
+
+       *repair = 0;
+
+       bno = blkmap_get(blkmap, 0);
+
+       if ( bno == NULLDFSBNO ) {
+               if (INT_GET(dip->di_core.di_anextents, ARCH_CONVERT) == 0  &&
+                   dip->di_core.di_aformat == XFS_DINODE_FMT_EXTENTS )
+                       /* it's okay the kernel can handle this state */
+                       return(0);
+               else    {
+                       do_warn("block 0 of inode %llu attribute fork"
+                               " is missing\n", ino);
+                       return(1);
+               }
+       }
+       /* FIX FOR bug 653709 -- EKN */
+       if (mp->m_sb.sb_agcount < XFS_FSB_TO_AGNO(mp, bno)) {
+               do_warn("agno of attribute fork of inode %llu out of "
+                       "regular partition\n", ino);
+               return(1);
+       }
+
+       bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, bno),
+                               XFS_FSB_TO_BB(mp, 1), 0);
+       if (!bp) {
+               do_warn("can't read block 0 of inode %llu attribute fork\n",
+                       ino);
+               return(1);
+       }
+
+       /* verify leaf block */
+       leaf = (xfs_attr_leafblock_t *)XFS_BUF_PTR(bp);
+
+       /* check sibling pointers in leaf block or root block 0 before
+       * we have to release the btree block
+       */
+       if (   INT_GET(leaf->hdr.info.forw, ARCH_CONVERT) != 0
+           || INT_GET(leaf->hdr.info.back, ARCH_CONVERT) != 0)  {
+               if (!no_modify)  {
+                       do_warn("clearing forw/back pointers in block 0 "
+                               "for attributes in inode %llu\n", ino);
+                       repairlinks = 1;
+                       INT_SET(leaf->hdr.info.forw, ARCH_CONVERT, 0);
+                       INT_SET(leaf->hdr.info.back, ARCH_CONVERT, 0);
+               } else  {
+                       do_warn("would clear forw/back pointers in block 0 "
+                               "for attributes in inode %llu\n", ino);
+               }
+       }
+
+       /*
+        * use magic number to tell us what type of attribute this is.
+        * it's possible to have a node or leaf attribute in either an
+        * extent format or btree format attribute fork.
+        */
+       switch (INT_GET(leaf->hdr.info.magic, ARCH_CONVERT)) {
+       case XFS_ATTR_LEAF_MAGIC:       /* leaf-form attribute */
+               if (process_leaf_attr_block(mp, leaf, 0, ino, blkmap,
+                               0, &next_hashval, repair)) {
+                       /* the block is bad.  lose the attribute fork. */
+                       libxfs_putbuf(bp);
+                       return(1); 
+               }
+               *repair = *repair || repairlinks; 
+               break;
+
+       case XFS_DA_NODE_MAGIC:         /* btree-form attribute */
+               /* must do this now, to release block 0 before the traversal */
+               if (repairlinks) {
+                       *repair = 1;
+                       libxfs_writebuf(bp, 0);
+               } else 
+                       libxfs_putbuf(bp);      
+               return (process_node_attr(mp, ino, dip, blkmap)); /* + repair */
+       default:
+               do_warn("bad attribute leaf magic # %#x for dir ino %llu\n", 
+                       INT_GET(leaf->hdr.info.magic, ARCH_CONVERT), ino);
+               libxfs_putbuf(bp);
+               return(1);
+       }
+
+       if (*repair && !no_modify) 
+               libxfs_writebuf(bp, 0);
+       else
+               libxfs_putbuf(bp);
+
+       return(0);  /* repair may be set */
+}
+
+
+/*
+ * returns 1 if attributes got cleared
+ * and 0 if things are ok. 
+ */
+int
+process_attributes(
+       xfs_mount_t     *mp,
+       xfs_ino_t       ino,
+       xfs_dinode_t    *dip,
+       blkmap_t        *blkmap,
+       int             *repair)  /* returned if we did repair */
+{
+       int err;
+       xfs_dinode_core_t *dinoc;
+       /* REFERENCED */
+       xfs_attr_shortform_t *asf;
+
+       dinoc = &dip->di_core;
+       asf = (xfs_attr_shortform_t *) XFS_DFORK_APTR_ARCH(dip, ARCH_CONVERT);
+
+       if (dinoc->di_aformat == XFS_DINODE_FMT_LOCAL) {
+               ASSERT(INT_GET(asf->hdr.totsize, ARCH_CONVERT) <= XFS_DFORK_ASIZE_ARCH(dip, mp, ARCH_CONVERT));
+               err = process_shortform_attr(ino, dip, repair);
+       } else if (dinoc->di_aformat == XFS_DINODE_FMT_EXTENTS ||
+                  dinoc->di_aformat == XFS_DINODE_FMT_BTREE)  {
+                       err = process_longform_attr(mp, ino, dip, blkmap,
+                               repair);
+                       /* if err, convert this to shortform and clear it */
+                       /* if repair and no error, it's taken care of */
+       } else  {
+               do_warn("illegal attribute format %d, ino %llu\n",
+                       dinoc->di_aformat, ino);
+               err = 1; 
+       }
+       return (err);  /* and repair */
+}
+
+/* 
+ * Validate an ACL
+ */
+static int
+acl_valid (struct acl *aclp)
+{
+       struct acl_entry *entry, *e;
+       int user = 0, group = 0, other = 0, mask = 0, mask_required = 0;
+       int i, j;
+
+       if (aclp == NULL)
+               goto acl_invalid;
+
+       if (aclp->acl_cnt > ACL_MAX_ENTRIES)
+               goto acl_invalid;
+
+       for (i = 0; i < aclp->acl_cnt; i++)
+       {
+
+               entry = &aclp->acl_entry[i];
+
+               switch (entry->ae_tag)
+               {
+                       case ACL_USER_OBJ:
+                               if (user++)
+                                       goto acl_invalid;
+                               break;
+                       case ACL_GROUP_OBJ:
+                               if (group++)
+                                       goto acl_invalid;
+                               break;
+                       case ACL_OTHER_OBJ:
+                               if (other++)
+                                       goto acl_invalid;
+                               break;
+                       case ACL_USER:
+                       case ACL_GROUP:
+                               for (j = i + 1; j < aclp->acl_cnt; j++)
+                               {
+                                       e = &aclp->acl_entry[j];
+                                       if (e->ae_id == entry->ae_id && e->ae_tag == entry->ae_tag)
+                                               goto acl_invalid;
+                               }
+                               mask_required++;
+                               break;
+                       case ACL_MASK:
+                               if (mask++)
+                                       goto acl_invalid;
+                               break;
+                       default:
+                               goto acl_invalid;
+               }
+       }
+       if (!user || !group || !other || (mask_required && !mask))
+               goto acl_invalid;
+       else
+               return 0;
+acl_invalid:
+       errno = EINVAL;
+       return (-1);
+}
+
+/*
+ * Check a category or division set to ensure that all values are in
+ * ascending order and each division or category appears only once.
+ */
+static int
+__check_setvalue(const unsigned short *list, unsigned short count)
+{
+        unsigned short i;
+
+        for (i = 1; i < count ; i++)
+                if (list[i] <= list[i-1])
+                        return -1;
+        return 0;
+}
+
+
+/*
+ * mac_valid(lp)
+ * check the validity of a mac label
+ */
+static int
+mac_valid(mac_t lp)
+{
+       if (lp == NULL)
+               return (0);
+
+       /*
+        * if the total category set and division set is greater than 250
+        * report error
+        */
+       if ((lp->ml_catcount + lp->ml_divcount) > MAC_MAX_SETS)
+               return(0);
+
+       /*
+        * check whether the msentype value is valid, and do they have
+        * appropriate level, category association.
+         */
+       switch (lp->ml_msen_type) {
+               case MSEN_ADMIN_LABEL:
+               case MSEN_EQUAL_LABEL:
+               case MSEN_HIGH_LABEL:
+               case MSEN_MLD_HIGH_LABEL:
+               case MSEN_LOW_LABEL:
+               case MSEN_MLD_LOW_LABEL:
+                       if (lp->ml_level != 0 || lp->ml_catcount > 0 )
+                               return (0);
+                       break;
+               case MSEN_TCSEC_LABEL:
+               case MSEN_MLD_LABEL:
+                       if (lp->ml_catcount > 0 &&
+                           __check_setvalue(lp->ml_list,
+                                            lp->ml_catcount) == -1)
+                               return (0);
+                       break;
+               case MSEN_UNKNOWN_LABEL:
+               default:
+                       return (0);
+       }
+
+       /*
+        * check whether the minttype value is valid, and do they have
+        * appropriate grade, division association.
+        */
+       switch (lp->ml_mint_type) {
+               case MINT_BIBA_LABEL:
+                       if (lp->ml_divcount > 0 &&
+                           __check_setvalue(lp->ml_list + lp->ml_catcount,
+                                            lp->ml_divcount) == -1)
+                               return(0);
+                       break;
+               case MINT_EQUAL_LABEL:
+               case MINT_HIGH_LABEL:
+               case MINT_LOW_LABEL:
+                       if (lp->ml_grade != 0 || lp->ml_divcount > 0 )
+                               return(0);
+                       break;
+               default:
+                       return(0);
+       }
+
+       return (1);
+}
diff --git a/repair/attr_repair.h b/repair/attr_repair.h
new file mode 100644 (file)
index 0000000..61d3f21
--- /dev/null
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#ifndef _XR_ATTRREPAIR_H
+#define _XR_ATTRREPAIR_H
+
+struct blkmap;
+
+int
+process_attributes(
+       xfs_mount_t     *mp,
+       xfs_ino_t       ino,
+       xfs_dinode_t    *dip,
+       struct blkmap   *blkmap,
+       int             *repair);
+
+
+#endif /* _XR_ATTRREPAIR_H */
diff --git a/repair/avl.c b/repair/avl.c
new file mode 100644 (file)
index 0000000..4d1a4ac
--- /dev/null
@@ -0,0 +1,1465 @@
+/**************************************************************************
+ *                                                                       *
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ *                                                                       *
+ **************************************************************************/
+
+#include <libxfs.h>
+
+#if defined(STAND_ALONE_DEBUG) || defined(AVL_USER_MODE_DEBUG)
+#define AVL_DEBUG
+#endif
+
+#include "avl.h"
+
+#define CERT   ASSERT
+
+#ifdef AVL_DEBUG
+
+static void
+avl_checknode(
+       register avltree_desc_t *tree,
+       register avlnode_t *np)
+{
+       register avlnode_t *back = np->avl_back;
+       register avlnode_t *forw = np->avl_forw;
+       register avlnode_t *nextino = np->avl_nextino;
+       register int bal = np->avl_balance;
+
+       ASSERT(bal != AVL_BALANCE || (!back && !forw) || (back && forw));
+       ASSERT(bal != AVL_FORW || forw);
+       ASSERT(bal != AVL_BACK || back);
+
+       if (forw) {
+               ASSERT(AVL_START(tree, np) < AVL_START(tree, forw));
+               ASSERT(np->avl_forw->avl_parent == np);
+               ASSERT(back || bal == AVL_FORW);
+       } else {
+               ASSERT(bal != AVL_FORW);
+               ASSERT(bal == AVL_BALANCE || back);
+               ASSERT(bal == AVL_BACK || !back);
+       }
+
+       if (back) {
+               ASSERT(AVL_START(tree, np) > AVL_START(tree, back));
+               ASSERT(np->avl_back->avl_parent == np);
+               ASSERT(forw || bal == AVL_BACK);
+       } else {
+               ASSERT(bal != AVL_BACK);
+               ASSERT(bal == AVL_BALANCE || forw);
+               ASSERT(bal == AVL_FORW || !forw);
+       }
+
+       if (nextino == NULL)
+               ASSERT(forw == NULL);
+       else
+               ASSERT(AVL_END(tree, np) <= AVL_START(tree, nextino));
+}
+
+static void
+avl_checktree(
+       register avltree_desc_t *tree,
+       register avlnode_t *root)
+{
+       register avlnode_t *nlast, *nnext, *np;
+       __psunsigned_t offset = 0;
+       __psunsigned_t end;
+
+       nlast = nnext = root;
+
+       ASSERT(!nnext || nnext->avl_parent == NULL);
+
+       while (nnext) {
+
+               avl_checknode(tree, nnext);
+               end = AVL_END(tree, nnext);
+
+               if (end <= offset) {
+                       if ((np = nnext->avl_forw) && np != nlast) {
+                               nlast = nnext;
+                               nnext = np;
+                       } else {
+                               nlast = nnext;
+                               nnext = nnext->avl_parent;
+                       }
+                       continue;
+               }
+
+               nlast = nnext;
+               if (np = nnext->avl_back) {
+                       if (AVL_END(tree, np) > offset) {
+                               nnext = np;
+                               continue;
+                       }
+               }
+
+               np = nnext;
+               nnext = nnext->avl_forw;
+               if (!nnext)
+                       nnext = np->avl_parent;
+
+               offset = end;
+       }
+}
+#else  /* ! AVL_DEBUG */
+#define avl_checktree(t,x)
+#endif /* AVL_DEBUG */
+
+
+/*
+ * Reset balance for np up through tree.
+ * ``direction'' is the way that np's balance
+ * is headed after the deletion of one of its children --
+ * e.g., deleting a avl_forw child sends avl_balance toward AVL_BACK.
+ * Called only when deleting a node from the tree.
+ */
+static void
+retreat(
+       avltree_desc_t *tree,
+       register avlnode_t *np,
+       register int direction)
+{
+       register avlnode_t **rootp = &tree->avl_root;
+       register avlnode_t *parent;
+       register avlnode_t *child;
+       register avlnode_t *tmp;
+       register int    bal;
+
+       do {
+               ASSERT(direction == AVL_BACK || direction == AVL_FORW);
+
+               if (np->avl_balance == AVL_BALANCE) {
+                       np->avl_balance = direction;
+                       return;
+               }
+
+               parent = np->avl_parent;
+
+               /*
+                * If balance is being restored, no local node
+                * reorganization is necessary, but may be at
+                * a higher node.  Reset direction and continue.
+                */
+               if (direction != np->avl_balance) {
+                       np->avl_balance = AVL_BALANCE;
+                       if (parent) {
+                               if (parent->avl_forw == np)
+                                       direction = AVL_BACK;
+                               else
+                                       direction = AVL_FORW;
+
+                               np = parent;
+                               continue;
+                       }
+                       return;
+               }
+
+               /*
+                * Imbalance.  If a avl_forw node was removed, direction
+                * (and, by reduction, np->avl_balance) is/was AVL_BACK.
+                */
+               if (np->avl_balance == AVL_BACK) {
+
+                       ASSERT(direction == AVL_BACK);
+                       child = np->avl_back;
+                       bal = child->avl_balance;
+
+                       if (bal != AVL_FORW) /* single LL */ {
+                               /*
+                                * np gets pushed down to lesser child's
+                                * avl_forw branch.
+                                *
+                                *  np->    -D              +B
+                                *          / \             / \
+                                * child-> B   deleted     A  -D
+                                *        / \                 /
+                                *       A   C               C
+                                */
+#ifdef AVL_PRINT
+                               if (!(tree->avl_flags & AVLF_DUPLICITY))
+                               cmn_err(CE_CONT, "!LL delete b 0x%x c 0x%x\n",
+                                       np, child);
+#endif
+                               np->avl_back = child->avl_forw;
+                               if (child->avl_forw)
+                                       child->avl_forw->avl_parent = np;
+                               child->avl_forw = np;
+
+                               if (parent) {
+                                       if (parent->avl_forw == np) {
+                                               parent->avl_forw = child;
+                                               direction = AVL_BACK;
+                                       } else {
+                                               ASSERT(parent->avl_back == np);
+                                               parent->avl_back = child;
+                                               direction = AVL_FORW;
+                                       }
+                               } else {
+                                       ASSERT(*rootp == np);
+                                       *rootp = child;
+                               }
+                               np->avl_parent = child;
+                               child->avl_parent = parent;
+
+                               if (bal == AVL_BALANCE) {
+                                       np->avl_balance = AVL_BACK;
+                                       child->avl_balance = AVL_FORW;
+                                       return;
+                               } else {
+                                       np->avl_balance = AVL_BALANCE;
+                                       child->avl_balance = AVL_BALANCE;
+                                       np = parent;
+                                       avl_checktree(tree, *rootp);
+                                       continue;
+                               }
+                       }
+
+                       /* child->avl_balance == AVL_FORW  double LR rotation
+                        *
+                        * child's avl_forw node gets promoted up, along with
+                        * its avl_forw subtree
+                        *
+                        *  np->     -G                   C
+                        *           / \                 / \
+                        * child-> +B   H              -B   G
+                        *         / \   \             /   / \
+                        *        A  +C   deleted     A   D   H
+                        *             \
+                        *              D
+                        */
+#ifdef AVL_PRINT
+                       if (!(tree->avl_flags & AVLF_DUPLICITY))
+                       cmn_err(CE_CONT, "!LR delete b 0x%x c 0x%x t 0x%x\n",
+                               np, child, child->avl_forw);
+#endif
+                       tmp = child->avl_forw;
+                       bal = tmp->avl_balance;
+
+                       child->avl_forw = tmp->avl_back;
+                       if (tmp->avl_back)
+                               tmp->avl_back->avl_parent = child;
+
+                       tmp->avl_back = child;
+                       child->avl_parent = tmp;
+
+                       np->avl_back = tmp->avl_forw;
+                       if (tmp->avl_forw)
+                               tmp->avl_forw->avl_parent = np;
+                       tmp->avl_forw = np;
+
+                       if (bal == AVL_FORW)
+                               child->avl_balance = AVL_BACK;
+                       else
+                               child->avl_balance = AVL_BALANCE;
+
+                       if (bal == AVL_BACK)
+                               np->avl_balance = AVL_FORW;
+                       else
+                               np->avl_balance = AVL_BALANCE;
+
+                       goto next;
+               }
+
+               ASSERT(np->avl_balance == AVL_FORW && direction == AVL_FORW);
+
+               child = np->avl_forw;
+               bal = child->avl_balance;
+
+               if (bal != AVL_BACK) /* single RR */ {
+                       /*
+                        * np gets pushed down to greater child's
+                        * avl_back branch.
+                        *
+                        *  np->    +B               -D
+                        *          / \              / \
+                        *   deleted   D <-child   +B   E
+                        *            / \            \
+                        *           C   E            C
+                        */
+#ifdef AVL_PRINT
+                       if (!(tree->avl_flags & AVLF_DUPLICITY))
+                       cmn_err(CE_CONT, "!RR delete b 0x%x c 0x%x\n",
+                               np, child);
+#endif
+                       np->avl_forw = child->avl_back;
+                       if (child->avl_back)
+                               child->avl_back->avl_parent = np;
+                       child->avl_back = np;
+
+                       if (parent) {
+                               if (parent->avl_forw == np) {
+                                       parent->avl_forw = child;
+                                       direction = AVL_BACK;
+                               } else {
+                                       ASSERT(parent->avl_back == np);
+                                       parent->avl_back = child;
+                                       direction = AVL_FORW;
+                               }
+                       } else {
+                               ASSERT(*rootp == np);
+                               *rootp = child;
+                       }
+                       np->avl_parent = child;
+                       child->avl_parent = parent;
+
+                       if (bal == AVL_BALANCE) {
+                               np->avl_balance = AVL_FORW;
+                               child->avl_balance = AVL_BACK;
+                               return;
+                       } else {
+                               np->avl_balance = AVL_BALANCE;
+                               child->avl_balance = AVL_BALANCE;
+                               np = parent;
+                               avl_checktree(tree, *rootp);
+                               continue;
+                       }
+               }
+
+               /* child->avl_balance == AVL_BACK  double RL rotation */
+#ifdef AVL_PRINT
+               if (!(tree->avl_flags & AVLF_DUPLICITY))
+               cmn_err(CE_CONT, "!RL delete b 0x%x c 0x%x t 0x%x\n",
+                       np, child, child->avl_back);
+#endif
+               tmp = child->avl_back;
+               bal = tmp->avl_balance;
+
+               child->avl_back = tmp->avl_forw;
+               if (tmp->avl_forw)
+                       tmp->avl_forw->avl_parent = child;
+
+               tmp->avl_forw = child;
+               child->avl_parent = tmp;
+
+               np->avl_forw = tmp->avl_back;
+               if (tmp->avl_back)
+                       tmp->avl_back->avl_parent = np;
+               tmp->avl_back = np;
+
+               if (bal == AVL_BACK)
+                       child->avl_balance = AVL_FORW;
+               else
+                       child->avl_balance = AVL_BALANCE;
+
+               if (bal == AVL_FORW)
+                       np->avl_balance = AVL_BACK;
+               else
+                       np->avl_balance = AVL_BALANCE;
+next:
+               np->avl_parent = tmp;
+               tmp->avl_balance = AVL_BALANCE;
+               tmp->avl_parent = parent;
+
+               if (parent) {
+                       if (parent->avl_forw == np) {
+                               parent->avl_forw = tmp;
+                               direction = AVL_BACK;
+                       } else {
+                               ASSERT(parent->avl_back == np);
+                               parent->avl_back = tmp;
+                               direction = AVL_FORW;
+                       }
+               } else {
+                       ASSERT(*rootp == np);
+                       *rootp = tmp;
+                       return;
+               }
+
+               np = parent;
+               avl_checktree(tree, *rootp);
+       } while (np);
+}
+
+/*
+ *     Remove node from tree.
+ *     avl_delete does the local tree manipulations,
+ *     calls retreat() to rebalance tree up to its root.
+ */
+void
+avl_delete(
+       register avltree_desc_t *tree,
+       register avlnode_t *np)
+{
+       register avlnode_t *forw = np->avl_forw;
+       register avlnode_t *back = np->avl_back;
+       register avlnode_t *parent = np->avl_parent;
+       register avlnode_t *nnext;
+
+
+       if (np->avl_back) {
+               /*
+                * a left child exits, then greatest left descendent's nextino
+                * is pointing to np; make it point to np->nextino.
+                */
+               nnext = np->avl_back;
+               while (nnext) {
+                       if (!nnext->avl_forw)
+                               break; /* can't find anything bigger */
+                       nnext = nnext->avl_forw;
+               }
+       } else
+       if (np->avl_parent) {
+               /*
+                * find nearest ancestor with lesser value. That ancestor's
+                * nextino is pointing to np; make it point to np->nextino
+                */
+                nnext = np->avl_parent;
+                while (nnext) {
+                       if (AVL_END(tree, nnext) <= AVL_END(tree, np))
+                               break;
+                       nnext = nnext->avl_parent;
+               }
+       } else
+               nnext = NULL;
+
+       if (nnext) {
+               ASSERT(nnext->avl_nextino == np);
+               nnext->avl_nextino = np->avl_nextino;
+               /*
+                *      Something preceeds np; np cannot be firstino.
+                */
+               ASSERT(tree->avl_firstino != np);
+       }
+       else {
+               /*
+                *      Nothing preceeding np; after deletion, np's nextino
+                *      is firstino of tree.
+                */
+               ASSERT(tree->avl_firstino == np);
+               tree->avl_firstino = np->avl_nextino;
+       }
+       
+
+       /*
+        * Degenerate cases...
+        */
+       if (forw == NULL) {
+               forw = back;
+               goto attach;
+       }
+
+       if (back == NULL) {
+attach:
+               if (forw)
+                       forw->avl_parent = parent;
+               if (parent) {
+                       if (parent->avl_forw == np) {
+                               parent->avl_forw = forw;
+                               retreat(tree, parent, AVL_BACK);
+                       } else {
+                               ASSERT(parent->avl_back == np);
+                               parent->avl_back = forw;
+                               retreat(tree, parent, AVL_FORW);
+                       }
+               } else {
+                       ASSERT(tree->avl_root == np);
+                       tree->avl_root = forw;
+               }
+               avl_checktree(tree, tree->avl_root);
+               return;
+       }
+
+       /*
+        * Harder case: children on both sides.
+        * If back's avl_forw pointer is null, just have back
+        * inherit np's avl_forw tree, remove np from the tree
+        * and adjust balance counters starting at back.
+        *
+        * np->     xI              xH  (befor retreat())
+        *          / \             / \
+        * back->  H   J           G   J
+        *        /   / \             / \
+        *       G   ?   ?           ?   ?
+        *      / \
+        *     ?   ?
+        */
+       if ((forw = back->avl_forw) == NULL) {
+               /*
+                * AVL_FORW retreat below will set back's
+                * balance to AVL_BACK.
+                */
+               back->avl_balance = np->avl_balance;
+               back->avl_forw = forw = np->avl_forw;
+               forw->avl_parent = back;
+               back->avl_parent = parent;
+               
+               if (parent) {
+                       if (parent->avl_forw == np)
+                               parent->avl_forw = back;
+                       else {
+                               ASSERT(parent->avl_back == np);
+                               parent->avl_back = back;
+                       }
+               } else {
+                       ASSERT(tree->avl_root == np);
+                       tree->avl_root = back;
+               }
+
+               /*
+                * back is taking np's place in the tree, and
+                * has therefore lost a avl_back node (itself).
+                */
+               retreat(tree, back, AVL_FORW);
+               avl_checktree(tree, tree->avl_root);
+               return;
+       }
+
+       /*
+        * Hardest case: children on both sides, and back's
+        * avl_forw pointer isn't null.  Find the immediately
+        * inferior buffer by following back's avl_forw line
+        * to the end, then have it inherit np's avl_forw tree.
+        *
+        * np->     xI                        xH
+        *          / \                       / \
+        *         G   J             back->  G   J   (before retreat())
+        *        / \                       / \
+        *       F   ?...                  F   ?1
+        *      /     \
+        *     ?       H  <-forw
+        *            /
+        *           ?1
+        */
+       while (back = forw->avl_forw)
+               forw = back;
+
+       /*
+        * Will be adjusted by retreat() below.
+        */
+       forw->avl_balance = np->avl_balance;
+       
+       /*
+        * forw inherits np's avl_forw...
+        */
+       forw->avl_forw = np->avl_forw;
+       np->avl_forw->avl_parent = forw;
+
+       /*
+        * ... forw's parent gets forw's avl_back...
+        */
+       back = forw->avl_parent;
+       back->avl_forw = forw->avl_back;
+       if (forw->avl_back)
+               forw->avl_back->avl_parent = back;
+
+       /*
+        * ... forw gets np's avl_back...
+        */
+       forw->avl_back = np->avl_back;
+       np->avl_back->avl_parent = forw;
+
+       /*
+        * ... and forw gets np's parent.
+        */
+       forw->avl_parent = parent;
+
+       if (parent) {
+               if (parent->avl_forw == np)
+                       parent->avl_forw = forw;
+               else
+                       parent->avl_back = forw;
+       } else {
+               ASSERT(tree->avl_root == np);
+               tree->avl_root = forw;
+       }
+
+       /*
+        * What used to be forw's parent is the starting
+        * point for rebalancing.  It has lost a avl_forw node.
+        */
+       retreat(tree, back, AVL_BACK);
+       avl_checktree(tree, tree->avl_root);
+}
+
+
+/*
+ *     avl_findanyrange:
+ *     
+ *     Given range r [start, end), find any range which is contained in r.
+ *     if checklen is non-zero, then only ranges of non-zero length are
+ *     considered in finding a match.
+ */
+avlnode_t *
+avl_findanyrange(
+       register avltree_desc_t *tree,
+       register __psunsigned_t start,
+       register __psunsigned_t end,
+       int     checklen)
+{
+        register avlnode_t *np = tree->avl_root;
+
+       /* np = avl_findadjacent(tree, start, AVL_SUCCEED); */
+       while (np) {
+               if (start < AVL_START(tree, np)) {
+                       if (np->avl_back) {
+                               np = np->avl_back;
+                               continue;
+                       }
+                       /* if we were to add node with start, would
+                        * have a growth of AVL_BACK
+                        */
+                       /* if succeeding node is needed, this is it.
+                        */
+                       break;
+               }
+               if (start >= AVL_END(tree, np)) {
+                       if (np->avl_forw) {
+                               np = np->avl_forw;
+                               continue;
+                       }
+                       /* if we were to add node with start, would
+                        * have a growth of AVL_FORW; 
+                        */
+                       /* we are looking for a succeeding node;
+                        * this is nextino.
+                        */
+                       np = np->avl_nextino;
+                       break;
+               }
+               /* AVL_START(tree, np) <= start < AVL_END(tree, np) */
+               break;
+       }
+       if (np) {
+               if (checklen == AVL_INCLUDE_ZEROLEN) {
+                       if (end <= AVL_START(tree, np)) {
+                               /* something follows start, but is
+                                * is entierly after the range (end)
+                                */
+                               return(NULL);
+                       }
+                       /* np may stradle [start, end) */
+                       return(np);
+               }
+               /*
+                * find non-zero length region 
+                */
+               while (np && (AVL_END(tree, np) - AVL_START(tree, np) == 0)
+                       && (AVL_START(tree, np)  < end))
+                               np = np->avl_nextino;
+
+               if ((np == NULL) || (AVL_START(tree, np) >= end))
+                       return NULL;
+               return(np);
+       }
+       /*
+        * nothing succeeds start, all existing ranges are before start.
+        */
+       return NULL;
+}
+
+
+/*
+ * Returns a pointer to range which contains value.
+ */
+avlnode_t *
+avl_findrange(
+       register avltree_desc_t *tree,
+       register __psunsigned_t value)
+{
+       register avlnode_t *np = tree->avl_root;
+
+       while (np) {
+               if (value < AVL_START(tree, np)) {
+                       np = np->avl_back;
+                       continue;
+               }
+               if (value >= AVL_END(tree, np)) {
+                       np = np->avl_forw;
+                       continue;
+               }
+               ASSERT(AVL_START(tree, np) <= value &&
+                      value < AVL_END(tree, np));
+               return np;
+       }
+       return NULL;
+}
+
+
+/*
+ * Returns a pointer to node which contains exact value.
+ */
+avlnode_t *
+avl_find(
+       register avltree_desc_t *tree,
+       register __psunsigned_t value)
+{
+       register avlnode_t *np = tree->avl_root;
+       register __psunsigned_t nvalue;
+
+       while (np) {
+               nvalue = AVL_START(tree, np);
+               if (value < nvalue) {
+                       np = np->avl_back;
+                       continue;
+               }
+               if (value == nvalue) {
+                       return np;
+               }
+               np = np->avl_forw;
+       }
+       return NULL;
+}
+
+
+/*
+ * Balance buffer AVL tree after attaching a new node to root.
+ * Called only by avl_insert.
+ */
+static void
+avl_balance(
+       register avlnode_t **rootp,
+       register avlnode_t *np,
+       register int growth)
+{
+       /*
+        * At this point, np points to the node to which
+        * a new node has been attached.  All that remains is to
+        * propagate avl_balance up the tree.
+        */
+       for ( ; ; ) {
+               register avlnode_t *parent = np->avl_parent;
+               register avlnode_t *child;
+
+               CERT(growth == AVL_BACK || growth == AVL_FORW);
+
+               /*
+                * If the buffer was already balanced, set avl_balance
+                * to the new direction.  Continue if there is a
+                * parent after setting growth to reflect np's
+                * relation to its parent.
+                */
+               if (np->avl_balance == AVL_BALANCE) {
+                       np->avl_balance = growth;
+                       if (parent) {
+                               if (parent->avl_forw == np)
+                                       growth = AVL_FORW;
+                               else {
+                                       ASSERT(parent->avl_back == np);
+                                       growth = AVL_BACK;
+                               }
+
+                               np = parent;
+                               continue;
+                       }
+                       break;
+               }
+
+               if (growth != np->avl_balance) {
+                       /*
+                        * Subtree is now balanced -- no net effect
+                        * in the size of the subtree, so leave.
+                        */
+                       np->avl_balance = AVL_BALANCE;
+                       break;
+               }
+
+               if (growth == AVL_BACK) {
+
+                       child = np->avl_back;
+                       CERT(np->avl_balance == AVL_BACK && child);
+
+                       if (child->avl_balance == AVL_BACK) { /* single LL */
+                               /*
+                                * ``A'' just got inserted;
+                                * np points to ``E'', child to ``C'',
+                                * and it is already AVL_BACK --
+                                * child will get promoted to top of subtree.
+
+                               np->         -E                 C
+                                            / \               / \
+                               child->    -C   F            -B   E
+                                          / \               /   / \
+                                        -B   D             A   D   F
+                                        /
+                                       A
+
+                                       Note that child->avl_parent and
+                                       avl_balance get set in common code.
+                                */
+                               np->avl_parent = child;
+                               np->avl_balance = AVL_BALANCE;
+                               np->avl_back = child->avl_forw;
+                               if (child->avl_forw)
+                                       child->avl_forw->avl_parent = np;
+                               child->avl_forw = np;
+                       } else {
+                               /*
+                                * double LR
+                                *
+                                * child's avl_forw node gets promoted to
+                                * the top of the subtree.
+
+                               np->         -E               C
+                                            / \             / \
+                               child->    +B   F          -B   E
+                                          / \             /   / \
+                                         A  +C           A   D   F
+                                              \
+                                               D
+
+                                */
+                               register avlnode_t *tmp = child->avl_forw;
+
+                               CERT(child->avl_balance == AVL_FORW && tmp);
+
+                               child->avl_forw = tmp->avl_back;
+                               if (tmp->avl_back)
+                                       tmp->avl_back->avl_parent = child;
+
+                               tmp->avl_back = child;
+                               child->avl_parent = tmp;
+
+                               np->avl_back = tmp->avl_forw;
+                               if (tmp->avl_forw)
+                                       tmp->avl_forw->avl_parent = np;
+
+                               tmp->avl_forw = np;
+                               np->avl_parent = tmp;
+
+                               if (tmp->avl_balance == AVL_BACK)
+                                       np->avl_balance = AVL_FORW;
+                               else
+                                       np->avl_balance = AVL_BALANCE;
+
+                               if (tmp->avl_balance == AVL_FORW)
+                                       child->avl_balance = AVL_BACK;
+                               else
+                                       child->avl_balance = AVL_BALANCE;
+
+                               /*
+                                * Set child to point to tmp since it is
+                                * now the top of the subtree, and will
+                                * get attached to the subtree parent in
+                                * the common code below.
+                                */
+                               child = tmp;
+                       }
+
+               } else /* growth == AVL_BACK */ {
+
+                       /*
+                        * This code is the mirror image of AVL_FORW above.
+                        */
+
+                       child = np->avl_forw;
+                       CERT(np->avl_balance == AVL_FORW && child);
+
+                       if (child->avl_balance == AVL_FORW) { /* single RR */
+                               np->avl_parent = child;
+                               np->avl_balance = AVL_BALANCE;
+                               np->avl_forw = child->avl_back;
+                               if (child->avl_back)
+                                       child->avl_back->avl_parent = np;
+                               child->avl_back = np;
+                       } else {
+                               /*
+                                * double RL
+                                */
+                               register avlnode_t *tmp = child->avl_back;
+
+                               ASSERT(child->avl_balance == AVL_BACK && tmp);
+
+                               child->avl_back = tmp->avl_forw;
+                               if (tmp->avl_forw)
+                                       tmp->avl_forw->avl_parent = child;
+
+                               tmp->avl_forw = child;
+                               child->avl_parent = tmp;
+
+                               np->avl_forw = tmp->avl_back;
+                               if (tmp->avl_back)
+                                       tmp->avl_back->avl_parent = np;
+
+                               tmp->avl_back = np;
+                               np->avl_parent = tmp;
+
+                               if (tmp->avl_balance == AVL_FORW)
+                                       np->avl_balance = AVL_BACK;
+                               else
+                                       np->avl_balance = AVL_BALANCE;
+
+                               if (tmp->avl_balance == AVL_BACK)
+                                       child->avl_balance = AVL_FORW;
+                               else
+                                       child->avl_balance = AVL_BALANCE;
+
+                               child = tmp;
+                       }
+               }
+
+               child->avl_parent = parent;
+               child->avl_balance = AVL_BALANCE;
+
+               if (parent) {
+                       if (parent->avl_back == np)
+                               parent->avl_back = child;
+                       else
+                               parent->avl_forw = child;
+               } else {
+                       ASSERT(*rootp == np);
+                       *rootp = child;
+               }
+
+               break;
+       }
+}
+
+static
+avlnode_t *
+avl_insert_find_growth(
+               register avltree_desc_t *tree,
+               register __psunsigned_t start,  /* range start at start, */
+               register __psunsigned_t end,    /* exclusive */
+               register int   *growthp)        /* OUT */ 
+{
+       avlnode_t *root = tree->avl_root;
+       register avlnode_t *np;
+
+       np = root;
+       ASSERT(np); /* caller ensures that there is atleast one node in tree */
+
+       for ( ; ; ) {
+               CERT(np->avl_parent || root == np);
+               CERT(!np->avl_parent || root != np);
+               CERT(!(np->avl_back) || np->avl_back->avl_parent == np);
+               CERT(!(np->avl_forw) || np->avl_forw->avl_parent == np);
+               CERT(np->avl_balance != AVL_FORW || np->avl_forw);
+               CERT(np->avl_balance != AVL_BACK || np->avl_back);
+               CERT(np->avl_balance != AVL_BALANCE ||
+                    np->avl_back == NULL || np->avl_forw);
+               CERT(np->avl_balance != AVL_BALANCE ||
+                    np->avl_forw == NULL || np->avl_back);
+
+               if (AVL_START(tree, np) >= end) {
+                       if (np->avl_back) {
+                               np = np->avl_back;
+                               continue;
+                       }
+                       *growthp = AVL_BACK;
+                       break;
+               }
+
+               if (AVL_END(tree, np) <= start) {
+                       if (np->avl_forw) {
+                               np = np->avl_forw;
+                               continue;
+                       }
+                       *growthp = AVL_FORW;
+                       break;
+               }
+               /* found exact match -- let caller decide if it is an error */
+               return(NULL);
+       }
+       return(np);
+}
+
+
+static void
+avl_insert_grow(
+       register avltree_desc_t *tree,
+       register avlnode_t *parent,
+       register avlnode_t *newnode,
+       register int growth)
+{
+       register avlnode_t *nnext;
+       register __psunsigned_t start = AVL_START(tree, newnode);
+
+       if (growth == AVL_BACK) {
+
+               parent->avl_back = newnode;
+               /*
+                * we are growing to the left; previous in-order to newnode is
+                * closest ancestor with lesser value. Before this
+                * insertion, this ancestor will be pointing to
+                * newnode's parent. After insertion, next in-order to newnode
+                * is the parent.
+                */
+               newnode->avl_nextino = parent;
+               nnext = parent;
+               while (nnext) {
+                       if (AVL_END(tree, nnext) <= start)
+                               break;
+                       nnext = nnext->avl_parent;
+               }
+               if (nnext)  {
+                       /*
+                        * nnext will be null if newnode is
+                        * the least element, and hence very first in the list.
+                        */
+                       ASSERT(nnext->avl_nextino == parent);
+                       nnext->avl_nextino = newnode;
+               }
+       }
+       else {
+               parent->avl_forw = newnode;
+               newnode->avl_nextino = parent->avl_nextino;
+               parent->avl_nextino = newnode;
+       }
+}
+
+
+avlnode_t *
+avl_insert(
+       register avltree_desc_t *tree,
+       register avlnode_t *newnode)
+{
+       register avlnode_t *np;
+       register __psunsigned_t start = AVL_START(tree, newnode);
+       register __psunsigned_t end = AVL_END(tree, newnode);
+       int growth;
+
+       ASSERT(newnode);
+       ASSERT(start <= end);
+
+       /*
+        * Clean all pointers for sanity; some will be reset as necessary.
+        */
+       newnode->avl_nextino = NULL;
+       newnode->avl_parent = NULL;
+       newnode->avl_forw = NULL;
+       newnode->avl_back = NULL;
+       newnode->avl_balance = AVL_BALANCE;
+
+       if ((np = tree->avl_root) == NULL) { /* degenerate case... */
+               tree->avl_root = newnode;
+               tree->avl_firstino = newnode;
+               return newnode;
+       }
+
+       if ((np = avl_insert_find_growth(tree, start, end, &growth)) == NULL) {
+               if (start != end)  { /* non-zero length range */
+#ifdef AVL_USER_MODE
+                       printf(
+                       "avl_insert: Warning! duplicate range [0x%x,0x%x)\n",
+                               start, end);
+#else
+                       /*
+                        * lockmetering tree can't afford printfs here.
+                        */
+                       if (!(tree->avl_flags & AVLF_DUPLICITY))
+                       cmn_err(CE_CONT,
+                       "!avl_insert: Warning! duplicate range [0x%x,0x%x)\n",
+                       start, end);
+#endif
+               }
+               return(NULL);
+       }
+
+       avl_insert_grow(tree, np, newnode, growth);
+       if (growth == AVL_BACK) {
+               /*
+                * Growing to left. if np was firstino, newnode will be firstino
+                */
+                if (tree->avl_firstino == np)
+                       tree->avl_firstino = newnode;
+       }
+#ifdef notneeded
+       else
+       if (growth == AVL_FORW)
+               /*
+                * Cannot possibly be firstino; there is somebody to our left.
+                */
+                ;
+#endif
+
+       newnode->avl_parent = np;
+       CERT(np->avl_forw == newnode || np->avl_back == newnode);
+
+       avl_balance(&tree->avl_root, np, growth);
+
+       avl_checktree(tree, tree->avl_root);
+
+       return newnode;
+}
+
+/*
+ *
+ * avl_insert_immediate(tree, afterp, newnode):
+ *     insert newnode immediately into tree immediately after afterp.
+ *     after insertion, newnode is right child of afterp.
+ */
+void
+avl_insert_immediate(
+               avltree_desc_t *tree,
+               avlnode_t *afterp,
+               avlnode_t *newnode)
+{
+       /*
+        * Clean all pointers for sanity; some will be reset as necessary.
+        */
+       newnode->avl_nextino = NULL;
+       newnode->avl_parent = NULL;
+       newnode->avl_forw = NULL;
+       newnode->avl_back = NULL;
+       newnode->avl_balance = AVL_BALANCE;
+
+       if (afterp == NULL) {
+               tree->avl_root = newnode;
+               tree->avl_firstino = newnode;
+               return;
+       }
+
+       ASSERT(afterp->avl_forw == NULL);
+       avl_insert_grow(tree, afterp, newnode, AVL_FORW); /* grow to right */
+       CERT(afterp->avl_forw == newnode);
+       avl_balance(&tree->avl_root, afterp, AVL_FORW);
+       avl_checktree(tree, tree->avl_root);
+}
+
+
+/*
+ *     Returns first in order node
+ */
+avlnode_t *
+avl_firstino(register avlnode_t *root)
+{
+       register avlnode_t *np;
+
+       if ((np = root) == NULL)
+               return NULL;
+
+       while (np->avl_back)
+               np = np->avl_back;
+       return np;
+}
+
+#ifdef AVL_USER_MODE
+/*
+ * leave this as a user-mode only routine until someone actually
+ * needs it in the kernel
+ */
+
+/*
+ *     Returns last in order node
+ */
+avlnode_t *
+avl_lastino(register avlnode_t *root)
+{
+       register avlnode_t *np;
+
+       if ((np = root) == NULL)
+               return NULL;
+
+       while (np->avl_forw)
+               np = np->avl_forw;
+       return np;
+}
+#endif
+
+void
+avl_init_tree(avltree_desc_t *tree, avlops_t *ops)
+{
+       tree->avl_root = NULL;
+       tree->avl_firstino = NULL;
+       tree->avl_ops = ops;
+}
+
+#ifdef AVL_DEBUG
+static void
+avl_printnode(avltree_desc_t *tree, avlnode_t *np, int nl)
+{
+       printf("[%d-%d]%c", AVL_START(tree, np),
+               (AVL_END(tree, np) - 1), nl ? '\n' : ' ');
+}
+#endif
+#ifdef STAND_ALONE_DEBUG
+
+struct avl_debug_node {
+       avlnode_t       avl_node;
+       xfs_off_t               avl_start;
+       unsigned int    avl_size;
+}
+
+avlops_t avl_debug_ops = {
+       avl_debug_start,
+       avl_debug_end,
+}
+
+static __psunsigned_t
+avl_debug_start(avlnode_t *node)
+{
+       return (__psunsigned_t)(struct avl_debug_node *)node->avl_start;
+}
+
+static __psunsigned_t
+avl_debug_end(avlnode_t *node)
+{
+       return (__psunsigned_t)
+               ((struct avl_debug_node *)node->avl_start +
+                (struct avl_debug_node *)node->avl_size);
+}
+
+avl_debug_node         freenodes[100];
+avl_debug_node         *freehead = &freenodes[0];
+
+static avlnode_t *
+alloc_avl_debug_node()
+{
+       freehead->avl_balance = AVL_BALANCE;
+       freehead->avl_parent = freehead->avl_forw = freehead->avl_back = NULL;
+       return(freehead++);
+}
+
+static void
+avl_print(avltree_desc_t *tree, avlnode_t *root, int depth)
+{
+       int i;
+
+       if (!root)
+               return;
+       if (root->avl_forw)
+               avl_print(tree, root->avl_forw, depth+5);
+       for (i = 0; i < depth; i++)
+               putchar((int) ' ');
+       avl_printnode(tree, root,1);
+       if (root->avl_back)
+               avl_print(tree, root->avl_back, depth+5);
+}
+
+main()
+{
+       int             i, j;
+       avlnode_t       *np;
+       avltree_desc_t  tree;
+       char            linebuf[256], cmd[256];
+
+       avl_init_tree(&tree, &avl_debug_ops);
+
+       for (i = 100; i > 0; i = i - 10)
+       {       
+               np = alloc__debug_avlnode();
+               ASSERT(np);
+               np->avl_start = i;
+               np->avl_size = 10;
+               avl_insert(&tree, np);
+       }
+       avl_print(&tree, tree.avl_root, 0);
+
+       for (np = tree.avl_firstino; np != NULL; np = np->avl_nextino)
+               avl_printnode(&tree, np, 0);
+       printf("\n");
+
+       while (1) {
+               printf("Command [fpdir] : ");
+               fgets(linebuf, 256, stdin);
+               if (feof(stdin)) break;
+               cmd[0] = NULL;
+               if (sscanf(linebuf, "%[fpdir]%d", cmd, &i) != 2)
+                       continue;
+               switch (cmd[0]) {
+               case 'd':
+               case 'f':
+                       printf("end of range ? ");
+                       fgets(linebuf, 256, stdin);
+                       j = atoi(linebuf);
+
+                       if (i == j) j = i+1;
+                       np = avl_findinrange(&tree,i,j);
+                       if (np) {
+                               avl_printnode(&tree, np, 1);
+                               if (cmd[0] == 'd')
+                                       avl_delete(&tree, np);
+                       } else
+                               printf("Cannot find %d\n", i);
+                       break;
+               case 'p':
+                       avl_print(&tree, tree.avl_root, 0);
+                       for (np = tree.avl_firstino;
+                               np != NULL; np = np->avl_nextino)
+                                       avl_printnode(&tree, np, 0);
+                       printf("\n");
+                       break;
+               case 'i':
+                       np = alloc_avlnode();
+                       ASSERT(np);
+                       np->avl_start = i;
+                       printf("size of range ? ");
+                       fgets(linebuf, 256, stdin);
+                       j = atoi(linebuf);
+
+                       np->avl_size = j;
+                       avl_insert(&tree, np);
+                       break;
+               case 'r': {
+                       avlnode_t       *b, *e, *t;
+                       int             checklen;
+
+                       printf("End of range ? ");
+                       fgets(linebuf, 256, stdin);
+                       j = atoi(linebuf);
+
+                       printf("checklen 0/1 ? ");
+                       fgets(linebuf, 256, stdin);
+                       checklen = atoi(linebuf);
+
+
+                       b = avl_findanyrange(&tree, i, j, checklen);
+                       if (b) {
+                               printf("Found something\n");
+                               t = b;
+                               while (t)  {
+                                       if (t != b &&
+                                           AVL_START(&tree, t) >= j)
+                                               break;
+                                       avl_printnode(&tree, t, 0);
+                                       t = t->avl_nextino;
+                               }
+                               printf("\n");
+                       }
+                    }
+               }
+       }
+}
+#endif
+
+/*
+ *     Given a tree, find value; will find return range enclosing value,
+ *     or range immediately succeeding value,
+ *     or range immediately preceeding value.
+ */
+avlnode_t *
+avl_findadjacent(
+       register avltree_desc_t *tree,
+       register __psunsigned_t value,
+       register int            dir)
+{
+        register avlnode_t *np = tree->avl_root;
+
+       while (np) {
+               if (value < AVL_START(tree, np)) {
+                       if (np->avl_back) {
+                               np = np->avl_back;
+                               continue;
+                       }
+                       /* if we were to add node with value, would
+                        * have a growth of AVL_BACK
+                        */
+                       if (dir == AVL_SUCCEED) {
+                               /* if succeeding node is needed, this is it.
+                                */
+                               return(np);
+                       }
+                       if (dir == AVL_PRECEED) {
+                               /*
+                                * find nearest ancestor with lesser value.
+                                */
+                                np = np->avl_parent;
+                                while (np) {
+                                       if (AVL_END(tree, np) <= value)
+                                               break;
+                                       np = np->avl_parent;
+                               }
+                               return(np);
+                       }
+                       ASSERT(dir == AVL_SUCCEED || dir == AVL_PRECEED);
+                       break;
+               }
+               if (value >= AVL_END(tree, np)) {
+                       if (np->avl_forw) {
+                               np = np->avl_forw;
+                               continue;
+                       }
+                       /* if we were to add node with value, would
+                        * have a growth of AVL_FORW; 
+                        */
+                       if (dir == AVL_SUCCEED) {
+                               /* we are looking for a succeeding node;
+                                * this is nextino.
+                                */
+                               return(np->avl_nextino);
+                       }
+                       if (dir == AVL_PRECEED) {
+                               /* looking for a preceeding node; this is it. */
+                               return(np);
+                       }       
+                       ASSERT(dir == AVL_SUCCEED || dir == AVL_PRECEED);
+               }
+               /* AVL_START(tree, np) <= value < AVL_END(tree, np) */
+               return(np);
+       }
+       return NULL;
+}
+
+
+#ifdef AVL_FUTURE_ENHANCEMENTS
+/*
+ *     avl_findranges:
+ *
+ *     Given range r [start, end), find all ranges in tree which are contained
+ *     in r. At return, startp and endp point to first and last of
+ *     a chain of elements which describe the contained ranges. Elements
+ *     in startp ... endp are in sort order, and can be accessed by
+ *     using avl_nextino.
+ */
+
+void
+avl_findranges(
+       register avltree_desc_t *tree,
+       register __psunsigned_t start,
+       register __psunsigned_t end,
+       avlnode_t               **startp,
+       avlnode_t               **endp)
+{
+        register avlnode_t *np;
+
+       np = avl_findadjacent(tree, start, AVL_SUCCEED);
+       if (np == NULL                          /* nothing succeding start */
+               || (np && (end <= AVL_START(tree, np))))
+                                               /* something follows start,
+                                               but... is entirely after end */
+       {
+               *startp = NULL;
+               *endp = NULL;
+               return;
+       }
+
+       *startp = np;
+
+       /* see if end is in this region itself */
+       if (end <= AVL_END(tree, np) ||
+           np->avl_nextino == NULL ||
+           (np->avl_nextino &&
+           (end <= AVL_START(tree, np->avl_nextino)))) {
+               *endp = np;
+               return;
+       }
+       /* have to munge for end */
+       /*
+        * note: have to look for (end - 1), since
+        * findadjacent will look for exact value, and does not
+        * care about the fact that end is actually one more
+        * than the value actually being looked for; thus feed it one less.
+        */
+       *endp = avl_findadjacent(tree, (end-1), AVL_PRECEED);
+       ASSERT(*endp);
+}
+
+#endif /* AVL_FUTURE_ENHANCEMENTS */
diff --git a/repair/avl.h b/repair/avl.h
new file mode 100644 (file)
index 0000000..a6d53f5
--- /dev/null
@@ -0,0 +1,143 @@
+/**************************************************************************
+ *                                                                       *
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ *                                                                       *
+ **************************************************************************/
+#ifndef __SYS_AVL_H__
+#define __SYS_AVL_H__
+
+
+typedef struct avlnode {
+       struct  avlnode *avl_forw;      /* pointer to right child  (> parent) */
+       struct  avlnode *avl_back;      /* pointer to left child  (< parent) */
+       struct  avlnode *avl_parent;    /* parent pointer */
+       struct  avlnode *avl_nextino;   /* next in-order; NULL terminated list*/
+       char             avl_balance;   /* tree balance */
+} avlnode_t;
+
+/*
+ * avl-tree operations
+ */
+typedef struct avlops {
+       __psunsigned_t  (*avl_start)(avlnode_t *);
+       __psunsigned_t  (*avl_end)(avlnode_t *);
+} avlops_t;
+
+#define        AVL_START(tree, n)      (*(tree)->avl_ops->avl_start)(n)
+#define        AVL_END(tree, n)        (*(tree)->avl_ops->avl_end)(n)
+
+/* 
+ * tree descriptor:
+ *     root points to the root of the tree.
+ *     firstino points to the first in the ordered list.
+ */
+typedef struct avltree_desc {
+       avlnode_t       *avl_root;
+       avlnode_t       *avl_firstino;
+       avlops_t        *avl_ops;
+       short            avl_flags;
+} avltree_desc_t;
+
+/* possible values for avl_balance */
+
+#define AVL_BACK       1
+#define AVL_BALANCE    0
+#define AVL_FORW       2
+
+/* possible values for avl_flags */
+
+#define AVLF_DUPLICITY 0x0001          /* no warnings on insert dups */
+
+/*
+ * 'Exported' avl tree routines
+ */
+avlnode_t
+*avl_insert(
+       avltree_desc_t *tree,
+       avlnode_t *newnode);
+
+void
+avl_delete(
+       avltree_desc_t *tree,
+       avlnode_t *np);
+
+void
+avl_insert_immediate(
+       avltree_desc_t *tree,
+       avlnode_t *afterp,
+       avlnode_t *newnode);
+       
+void
+avl_init_tree(
+       avltree_desc_t  *tree,
+       avlops_t *ops);
+
+avlnode_t *
+avl_findrange(
+       avltree_desc_t *tree,
+       __psunsigned_t value);
+
+avlnode_t *
+avl_find(
+       avltree_desc_t *tree,
+       __psunsigned_t value);
+
+avlnode_t *
+avl_findanyrange(
+       avltree_desc_t *tree,
+       __psunsigned_t start,
+       __psunsigned_t end,
+       int     checklen);
+
+
+avlnode_t *
+avl_findadjacent(
+       avltree_desc_t *tree,
+       __psunsigned_t value,
+       int             dir);
+
+#ifdef AVL_FUTURE_ENHANCEMENTS
+void
+avl_findranges(
+       register avltree_desc_t *tree,
+       register __psunsigned_t start,
+       register __psunsigned_t end,
+       avlnode_t               **startp,
+       avlnode_t               **endp);
+#endif
+
+#define AVL_PRECEED    0x1
+#define AVL_SUCCEED    0x2
+
+#define AVL_INCLUDE_ZEROLEN    0x0000
+#define AVL_EXCLUDE_ZEROLEN    0x0001
+
+#endif /* __SYS_AVL_H__ */
diff --git a/repair/avl64.c b/repair/avl64.c
new file mode 100644 (file)
index 0000000..091bc81
--- /dev/null
@@ -0,0 +1,1458 @@
+/**************************************************************************
+ *                                                                       *
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ *                                                                       *
+ **************************************************************************/
+
+/* to allow use by user-level utilities */
+
+#ifdef STAND_ALONE_DEBUG
+#define AVL_USER_MODE
+#endif
+
+#if defined(STAND_ALONE_DEBUG) || defined(AVL_USER_MODE_DEBUG)
+#define AVL_DEBUG
+#endif
+
+#include <stdio.h>
+#include <libxfs.h>
+#include "avl64.h"
+
+#define CERT   ASSERT
+
+#ifdef AVL_DEBUG
+
+static void
+avl64_checknode(
+       register avl64tree_desc_t *tree,
+       register avl64node_t *np)
+{
+       register avl64node_t *back = np->avl_back;
+       register avl64node_t *forw = np->avl_forw;
+       register avl64node_t *nextino = np->avl_nextino;
+       register int bal = np->avl_balance;
+
+       ASSERT(bal != AVL_BALANCE || (!back && !forw) || (back && forw));
+       ASSERT(bal != AVL_FORW || forw);
+       ASSERT(bal != AVL_BACK || back);
+
+       if (forw) {
+               ASSERT(AVL_START(tree, np) < AVL_START(tree, forw));
+               ASSERT(np->avl_forw->avl_parent == np);
+               ASSERT(back || bal == AVL_FORW);
+       } else {
+               ASSERT(bal != AVL_FORW);
+               ASSERT(bal == AVL_BALANCE || back);
+               ASSERT(bal == AVL_BACK || !back);
+       }
+
+       if (back) {
+               ASSERT(AVL_START(tree, np) > AVL_START(tree, back));
+               ASSERT(np->avl_back->avl_parent == np);
+               ASSERT(forw || bal == AVL_BACK);
+       } else {
+               ASSERT(bal != AVL_BACK);
+               ASSERT(bal == AVL_BALANCE || forw);
+               ASSERT(bal == AVL_FORW || !forw);
+       }
+
+       if (nextino == NULL)
+               ASSERT(forw == NULL);
+       else
+               ASSERT(AVL_END(tree, np) <= AVL_START(tree, nextino));
+}
+
+static void
+avl64_checktree(
+       register avl64tree_desc_t *tree,
+       register avl64node_t *root)
+{
+       register avl64node_t *nlast, *nnext, *np;
+       __uint64_t offset = 0;
+       __uint64_t end;
+
+       nlast = nnext = root;
+
+       ASSERT(!nnext || nnext->avl_parent == NULL);
+
+       while (nnext) {
+
+               avl64_checknode(tree, nnext);
+               end = AVL_END(tree, nnext);
+
+               if (end <= offset) {
+                       if ((np = nnext->avl_forw) && np != nlast) {
+                               nlast = nnext;
+                               nnext = np;
+                       } else {
+                               nlast = nnext;
+                               nnext = nnext->avl_parent;
+                       }
+                       continue;
+               }
+
+               nlast = nnext;
+               if (np = nnext->avl_back) {
+                       if (AVL_END(tree, np) > offset) {
+                               nnext = np;
+                               continue;
+                       }
+               }
+
+               np = nnext;
+               nnext = nnext->avl_forw;
+               if (!nnext)
+                       nnext = np->avl_parent;
+
+               offset = end;
+       }
+}
+#else  /* ! AVL_DEBUG */
+#define avl64_checktree(t,x)
+#endif /* AVL_DEBUG */
+
+
+/*
+ * Reset balance for np up through tree.
+ * ``direction'' is the way that np's balance
+ * is headed after the deletion of one of its children --
+ * e.g., deleting a avl_forw child sends avl_balance toward AVL_BACK.
+ * Called only when deleting a node from the tree.
+ */
+static void
+retreat(
+       avl64tree_desc_t *tree,
+       register avl64node_t *np,
+       register int direction)
+{
+       register avl64node_t **rootp = &tree->avl_root;
+       register avl64node_t *parent;
+       register avl64node_t *child;
+       register avl64node_t *tmp;
+       register int    bal;
+
+       do {
+               ASSERT(direction == AVL_BACK || direction == AVL_FORW);
+
+               if (np->avl_balance == AVL_BALANCE) {
+                       np->avl_balance = direction;
+                       return;
+               }
+
+               parent = np->avl_parent;
+
+               /*
+                * If balance is being restored, no local node
+                * reorganization is necessary, but may be at
+                * a higher node.  Reset direction and continue.
+                */
+               if (direction != np->avl_balance) {
+                       np->avl_balance = AVL_BALANCE;
+                       if (parent) {
+                               if (parent->avl_forw == np)
+                                       direction = AVL_BACK;
+                               else
+                                       direction = AVL_FORW;
+
+                               np = parent;
+                               continue;
+                       }
+                       return;
+               }
+
+               /*
+                * Imbalance.  If a avl_forw node was removed, direction
+                * (and, by reduction, np->avl_balance) is/was AVL_BACK.
+                */
+               if (np->avl_balance == AVL_BACK) {
+
+                       ASSERT(direction == AVL_BACK);
+                       child = np->avl_back;
+                       bal = child->avl_balance;
+
+                       if (bal != AVL_FORW) /* single LL */ {
+                               /*
+                                * np gets pushed down to lesser child's
+                                * avl_forw branch.
+                                *
+                                *  np->    -D              +B
+                                *          / \             / \
+                                * child-> B   deleted     A  -D
+                                *        / \                 /
+                                *       A   C               C
+                               cmn_err(CE_CONT, "!LL delete b 0x%x c 0x%x\n",
+                                       np, child);
+                                */
+
+                               np->avl_back = child->avl_forw;
+                               if (child->avl_forw)
+                                       child->avl_forw->avl_parent = np;
+                               child->avl_forw = np;
+
+                               if (parent) {
+                                       if (parent->avl_forw == np) {
+                                               parent->avl_forw = child;
+                                               direction = AVL_BACK;
+                                       } else {
+                                               ASSERT(parent->avl_back == np);
+                                               parent->avl_back = child;
+                                               direction = AVL_FORW;
+                                       }
+                               } else {
+                                       ASSERT(*rootp == np);
+                                       *rootp = child;
+                               }
+                               np->avl_parent = child;
+                               child->avl_parent = parent;
+
+                               if (bal == AVL_BALANCE) {
+                                       np->avl_balance = AVL_BACK;
+                                       child->avl_balance = AVL_FORW;
+                                       return;
+                               } else {
+                                       np->avl_balance = AVL_BALANCE;
+                                       child->avl_balance = AVL_BALANCE;
+                                       np = parent;
+                                       avl64_checktree(tree, *rootp);
+                                       continue;
+                               }
+                       }
+
+                       /* child->avl_balance == AVL_FORW  double LR rotation
+                        *
+                        * child's avl_forw node gets promoted up, along with
+                        * its avl_forw subtree
+                        *
+                        *  np->     -G                   C
+                        *           / \                 / \
+                        * child-> +B   H              -B   G
+                        *         / \   \             /   / \
+                        *        A  +C   deleted     A   D   H
+                        *             \
+                        *              D
+                       cmn_err(CE_CONT, "!LR delete b 0x%x c 0x%x t 0x%x\n",
+                               np, child, child->avl_forw);
+                        */
+
+                       tmp = child->avl_forw;
+                       bal = tmp->avl_balance;
+
+                       child->avl_forw = tmp->avl_back;
+                       if (tmp->avl_back)
+                               tmp->avl_back->avl_parent = child;
+
+                       tmp->avl_back = child;
+                       child->avl_parent = tmp;
+
+                       np->avl_back = tmp->avl_forw;
+                       if (tmp->avl_forw)
+                               tmp->avl_forw->avl_parent = np;
+                       tmp->avl_forw = np;
+
+                       if (bal == AVL_FORW)
+                               child->avl_balance = AVL_BACK;
+                       else
+                               child->avl_balance = AVL_BALANCE;
+
+                       if (bal == AVL_BACK)
+                               np->avl_balance = AVL_FORW;
+                       else
+                               np->avl_balance = AVL_BALANCE;
+
+                       goto next;
+               }
+
+               ASSERT(np->avl_balance == AVL_FORW && direction == AVL_FORW);
+
+               child = np->avl_forw;
+               bal = child->avl_balance;
+
+               if (bal != AVL_BACK) /* single RR */ {
+                       /*
+                        * np gets pushed down to greater child's
+                        * avl_back branch.
+                        *
+                        *  np->    +B               -D
+                        *          / \              / \
+                        *   deleted   D <-child   +B   E
+                        *            / \            \
+                        *           C   E            C
+                       cmn_err(CE_CONT, "!RR delete b 0x%x c 0x%x\n",
+                               np, child);
+                        */
+
+                       np->avl_forw = child->avl_back;
+                       if (child->avl_back)
+                               child->avl_back->avl_parent = np;
+                       child->avl_back = np;
+
+                       if (parent) {
+                               if (parent->avl_forw == np) {
+                                       parent->avl_forw = child;
+                                       direction = AVL_BACK;
+                               } else {
+                                       ASSERT(parent->avl_back == np);
+                                       parent->avl_back = child;
+                                       direction = AVL_FORW;
+                               }
+                       } else {
+                               ASSERT(*rootp == np);
+                               *rootp = child;
+                       }
+                       np->avl_parent = child;
+                       child->avl_parent = parent;
+
+                       if (bal == AVL_BALANCE) {
+                               np->avl_balance = AVL_FORW;
+                               child->avl_balance = AVL_BACK;
+                               return;
+                       } else {
+                               np->avl_balance = AVL_BALANCE;
+                               child->avl_balance = AVL_BALANCE;
+                               np = parent;
+                               avl64_checktree(tree, *rootp);
+                               continue;
+                       }
+               }
+
+               /* child->avl_balance == AVL_BACK  double RL rotation
+               cmn_err(CE_CONT, "!RL delete b 0x%x c 0x%x t 0x%x\n",
+                       np, child, child->avl_back);
+               */
+
+               tmp = child->avl_back;
+               bal = tmp->avl_balance;
+
+               child->avl_back = tmp->avl_forw;
+               if (tmp->avl_forw)
+                       tmp->avl_forw->avl_parent = child;
+
+               tmp->avl_forw = child;
+               child->avl_parent = tmp;
+
+               np->avl_forw = tmp->avl_back;
+               if (tmp->avl_back)
+                       tmp->avl_back->avl_parent = np;
+               tmp->avl_back = np;
+
+               if (bal == AVL_BACK)
+                       child->avl_balance = AVL_FORW;
+               else
+                       child->avl_balance = AVL_BALANCE;
+
+               if (bal == AVL_FORW)
+                       np->avl_balance = AVL_BACK;
+               else
+                       np->avl_balance = AVL_BALANCE;
+next:
+               np->avl_parent = tmp;
+               tmp->avl_balance = AVL_BALANCE;
+               tmp->avl_parent = parent;
+
+               if (parent) {
+                       if (parent->avl_forw == np) {
+                               parent->avl_forw = tmp;
+                               direction = AVL_BACK;
+                       } else {
+                               ASSERT(parent->avl_back == np);
+                               parent->avl_back = tmp;
+                               direction = AVL_FORW;
+                       }
+               } else {
+                       ASSERT(*rootp == np);
+                       *rootp = tmp;
+                       return;
+               }
+
+               np = parent;
+               avl64_checktree(tree, *rootp);
+       } while (np);
+}
+
+/*
+ *     Remove node from tree.
+ *     avl_delete does the local tree manipulations,
+ *     calls retreat() to rebalance tree up to its root.
+ */
+void
+avl64_delete(
+       register avl64tree_desc_t *tree,
+       register avl64node_t *np)
+{
+       register avl64node_t *forw = np->avl_forw;
+       register avl64node_t *back = np->avl_back;
+       register avl64node_t *parent = np->avl_parent;
+       register avl64node_t *nnext;
+
+
+       if (np->avl_back) {
+               /*
+                * a left child exits, then greatest left descendent's nextino
+                * is pointing to np; make it point to np->nextino.
+                */
+               nnext = np->avl_back;
+               while (nnext) {
+                       if (!nnext->avl_forw)
+                               break; /* can't find anything bigger */
+                       nnext = nnext->avl_forw;
+               }
+       } else
+       if (np->avl_parent) {
+               /*
+                * find nearest ancestor with lesser value. That ancestor's
+                * nextino is pointing to np; make it point to np->nextino
+                */
+                nnext = np->avl_parent;
+                while (nnext) {
+                       if (AVL_END(tree, nnext) <= AVL_END(tree, np))
+                               break;
+                       nnext = nnext->avl_parent;
+               }
+       } else
+               nnext = NULL;
+
+       if (nnext) {
+               ASSERT(nnext->avl_nextino == np);
+               nnext->avl_nextino = np->avl_nextino;
+               /*
+                *      Something preceeds np; np cannot be firstino.
+                */
+               ASSERT(tree->avl_firstino != np);
+       }
+       else {
+               /*
+                *      Nothing preceeding np; after deletion, np's nextino
+                *      is firstino of tree.
+                */
+               ASSERT(tree->avl_firstino == np);
+               tree->avl_firstino = np->avl_nextino;
+       }
+       
+
+       /*
+        * Degenerate cases...
+        */
+       if (forw == NULL) {
+               forw = back;
+               goto attach;
+       }
+
+       if (back == NULL) {
+attach:
+               if (forw)
+                       forw->avl_parent = parent;
+               if (parent) {
+                       if (parent->avl_forw == np) {
+                               parent->avl_forw = forw;
+                               retreat(tree, parent, AVL_BACK);
+                       } else {
+                               ASSERT(parent->avl_back == np);
+                               parent->avl_back = forw;
+                               retreat(tree, parent, AVL_FORW);
+                       }
+               } else {
+                       ASSERT(tree->avl_root == np);
+                       tree->avl_root = forw;
+               }
+               avl64_checktree(tree, tree->avl_root);
+               return;
+       }
+
+       /*
+        * Harder case: children on both sides.
+        * If back's avl_forw pointer is null, just have back
+        * inherit np's avl_forw tree, remove np from the tree
+        * and adjust balance counters starting at back.
+        *
+        * np->     xI              xH  (befor retreat())
+        *          / \             / \
+        * back->  H   J           G   J
+        *        /   / \             / \
+        *       G   ?   ?           ?   ?
+        *      / \
+        *     ?   ?
+        */
+       if ((forw = back->avl_forw) == NULL) {
+               /*
+                * AVL_FORW retreat below will set back's
+                * balance to AVL_BACK.
+                */
+               back->avl_balance = np->avl_balance;
+               back->avl_forw = forw = np->avl_forw;
+               forw->avl_parent = back;
+               back->avl_parent = parent;
+               
+               if (parent) {
+                       if (parent->avl_forw == np)
+                               parent->avl_forw = back;
+                       else {
+                               ASSERT(parent->avl_back == np);
+                               parent->avl_back = back;
+                       }
+               } else {
+                       ASSERT(tree->avl_root == np);
+                       tree->avl_root = back;
+               }
+
+               /*
+                * back is taking np's place in the tree, and
+                * has therefore lost a avl_back node (itself).
+                */
+               retreat(tree, back, AVL_FORW);
+               avl64_checktree(tree, tree->avl_root);
+               return;
+       }
+
+       /*
+        * Hardest case: children on both sides, and back's
+        * avl_forw pointer isn't null.  Find the immediately
+        * inferior buffer by following back's avl_forw line
+        * to the end, then have it inherit np's avl_forw tree.
+        *
+        * np->     xI                        xH
+        *          / \                       / \
+        *         G   J             back->  G   J   (before retreat())
+        *        / \                       / \
+        *       F   ?...                  F   ?1
+        *      /     \
+        *     ?       H  <-forw
+        *            /
+        *           ?1
+        */
+       while (back = forw->avl_forw)
+               forw = back;
+
+       /*
+        * Will be adjusted by retreat() below.
+        */
+       forw->avl_balance = np->avl_balance;
+       
+       /*
+        * forw inherits np's avl_forw...
+        */
+       forw->avl_forw = np->avl_forw;
+       np->avl_forw->avl_parent = forw;
+
+       /*
+        * ... forw's parent gets forw's avl_back...
+        */
+       back = forw->avl_parent;
+       back->avl_forw = forw->avl_back;
+       if (forw->avl_back)
+               forw->avl_back->avl_parent = back;
+
+       /*
+        * ... forw gets np's avl_back...
+        */
+       forw->avl_back = np->avl_back;
+       np->avl_back->avl_parent = forw;
+
+       /*
+        * ... and forw gets np's parent.
+        */
+       forw->avl_parent = parent;
+
+       if (parent) {
+               if (parent->avl_forw == np)
+                       parent->avl_forw = forw;
+               else
+                       parent->avl_back = forw;
+       } else {
+               ASSERT(tree->avl_root == np);
+               tree->avl_root = forw;
+       }
+
+       /*
+        * What used to be forw's parent is the starting
+        * point for rebalancing.  It has lost a avl_forw node.
+        */
+       retreat(tree, back, AVL_BACK);
+       avl64_checktree(tree, tree->avl_root);
+}
+
+
+/*
+ *     avl_findanyrange:
+ *     
+ *     Given range r [start, end), find any range which is contained in r.
+ *     if checklen is non-zero, then only ranges of non-zero length are
+ *     considered in finding a match.
+ */
+avl64node_t *
+avl64_findanyrange(
+       register avl64tree_desc_t *tree,
+       register __uint64_t start,
+       register __uint64_t end,
+       int     checklen)
+{
+        register avl64node_t *np = tree->avl_root;
+
+       /* np = avl64_findadjacent(tree, start, AVL_SUCCEED); */
+       while (np) {
+               if (start < AVL_START(tree, np)) {
+                       if (np->avl_back) {
+                               np = np->avl_back;
+                               continue;
+                       }
+                       /* if we were to add node with start, would
+                        * have a growth of AVL_BACK
+                        */
+                       /* if succeeding node is needed, this is it.
+                        */
+                       break;
+               }
+               if (start >= AVL_END(tree, np)) {
+                       if (np->avl_forw) {
+                               np = np->avl_forw;
+                               continue;
+                       }
+                       /* if we were to add node with start, would
+                        * have a growth of AVL_FORW; 
+                        */
+                       /* we are looking for a succeeding node;
+                        * this is nextino.
+                        */
+                       np = np->avl_nextino;
+                       break;
+               }
+               /* AVL_START(tree, np) <= start < AVL_END(tree, np) */
+               break;
+       }
+       if (np) {
+               if (checklen == AVL_INCLUDE_ZEROLEN) {
+                       if (end <= AVL_START(tree, np)) {
+                               /* something follows start, but is
+                                * is entierly after the range (end)
+                                */
+                               return(NULL);
+                       }
+                       /* np may stradle [start, end) */
+                       return(np);
+               }
+               /*
+                * find non-zero length region 
+                */
+               while (np && (AVL_END(tree, np) - AVL_START(tree, np) == 0)
+                       && (AVL_START(tree, np)  < end))
+                               np = np->avl_nextino;
+
+               if ((np == NULL) || (AVL_START(tree, np) >= end))
+                       return NULL;
+               return(np);
+       }
+       /*
+        * nothing succeeds start, all existing ranges are before start.
+        */
+       return NULL;
+}
+
+
+/*
+ * Returns a pointer to range which contains value.
+ */
+avl64node_t *
+avl64_findrange(
+       register avl64tree_desc_t *tree,
+       register __uint64_t value)
+{
+       register avl64node_t *np = tree->avl_root;
+
+       while (np) {
+               if (value < AVL_START(tree, np)) {
+                       np = np->avl_back;
+                       continue;
+               }
+               if (value >= AVL_END(tree, np)) {
+                       np = np->avl_forw;
+                       continue;
+               }
+               ASSERT(AVL_START(tree, np) <= value &&
+                      value < AVL_END(tree, np));
+               return np;
+       }
+       return NULL;
+}
+
+
+/*
+ * Returns a pointer to node which contains exact value.
+ */
+avl64node_t *
+avl64_find(
+       register avl64tree_desc_t *tree,
+       register __uint64_t value)
+{
+       register avl64node_t *np = tree->avl_root;
+       register __uint64_t nvalue;
+
+       while (np) {
+               nvalue = AVL_START(tree, np);
+               if (value < nvalue) {
+                       np = np->avl_back;
+                       continue;
+               }
+               if (value == nvalue) {
+                       return np;
+               }
+               np = np->avl_forw;
+       }
+       return NULL;
+}
+
+
+/*
+ * Balance buffer AVL tree after attaching a new node to root.
+ * Called only by avl_insert.
+ */
+static void
+avl64_balance(
+       register avl64node_t **rootp,
+       register avl64node_t *np,
+       register int growth)
+{
+       /*
+        * At this point, np points to the node to which
+        * a new node has been attached.  All that remains is to
+        * propagate avl_balance up the tree.
+        */
+       for ( ; ; ) {
+               register avl64node_t *parent = np->avl_parent;
+               register avl64node_t *child;
+
+               CERT(growth == AVL_BACK || growth == AVL_FORW);
+
+               /*
+                * If the buffer was already balanced, set avl_balance
+                * to the new direction.  Continue if there is a
+                * parent after setting growth to reflect np's
+                * relation to its parent.
+                */
+               if (np->avl_balance == AVL_BALANCE) {
+                       np->avl_balance = growth;
+                       if (parent) {
+                               if (parent->avl_forw == np)
+                                       growth = AVL_FORW;
+                               else {
+                                       ASSERT(parent->avl_back == np);
+                                       growth = AVL_BACK;
+                               }
+
+                               np = parent;
+                               continue;
+                       }
+                       break;
+               }
+
+               if (growth != np->avl_balance) {
+                       /*
+                        * Subtree is now balanced -- no net effect
+                        * in the size of the subtree, so leave.
+                        */
+                       np->avl_balance = AVL_BALANCE;
+                       break;
+               }
+
+               if (growth == AVL_BACK) {
+
+                       child = np->avl_back;
+                       CERT(np->avl_balance == AVL_BACK && child);
+
+                       if (child->avl_balance == AVL_BACK) { /* single LL */
+                               /*
+                                * ``A'' just got inserted;
+                                * np points to ``E'', child to ``C'',
+                                * and it is already AVL_BACK --
+                                * child will get promoted to top of subtree.
+
+                               np->         -E                 C
+                                            / \               / \
+                               child->    -C   F            -B   E
+                                          / \               /   / \
+                                        -B   D             A   D   F
+                                        /
+                                       A
+
+                                       Note that child->avl_parent and
+                                       avl_balance get set in common code.
+                                */
+                               np->avl_parent = child;
+                               np->avl_balance = AVL_BALANCE;
+                               np->avl_back = child->avl_forw;
+                               if (child->avl_forw)
+                                       child->avl_forw->avl_parent = np;
+                               child->avl_forw = np;
+                       } else {
+                               /*
+                                * double LR
+                                *
+                                * child's avl_forw node gets promoted to
+                                * the top of the subtree.
+
+                               np->         -E               C
+                                            / \             / \
+                               child->    +B   F          -B   E
+                                          / \             /   / \
+                                         A  +C           A   D   F
+                                              \
+                                               D
+
+                                */
+                               register avl64node_t *tmp = child->avl_forw;
+
+                               CERT(child->avl_balance == AVL_FORW && tmp);
+
+                               child->avl_forw = tmp->avl_back;
+                               if (tmp->avl_back)
+                                       tmp->avl_back->avl_parent = child;
+
+                               tmp->avl_back = child;
+                               child->avl_parent = tmp;
+
+                               np->avl_back = tmp->avl_forw;
+                               if (tmp->avl_forw)
+                                       tmp->avl_forw->avl_parent = np;
+
+                               tmp->avl_forw = np;
+                               np->avl_parent = tmp;
+
+                               if (tmp->avl_balance == AVL_BACK)
+                                       np->avl_balance = AVL_FORW;
+                               else
+                                       np->avl_balance = AVL_BALANCE;
+
+                               if (tmp->avl_balance == AVL_FORW)
+                                       child->avl_balance = AVL_BACK;
+                               else
+                                       child->avl_balance = AVL_BALANCE;
+
+                               /*
+                                * Set child to point to tmp since it is
+                                * now the top of the subtree, and will
+                                * get attached to the subtree parent in
+                                * the common code below.
+                                */
+                               child = tmp;
+                       }
+
+               } else /* growth == AVL_BACK */ {
+
+                       /*
+                        * This code is the mirror image of AVL_FORW above.
+                        */
+
+                       child = np->avl_forw;
+                       CERT(np->avl_balance == AVL_FORW && child);
+
+                       if (child->avl_balance == AVL_FORW) { /* single RR */
+                               np->avl_parent = child;
+                               np->avl_balance = AVL_BALANCE;
+                               np->avl_forw = child->avl_back;
+                               if (child->avl_back)
+                                       child->avl_back->avl_parent = np;
+                               child->avl_back = np;
+                       } else {
+                               /*
+                                * double RL
+                                */
+                               register avl64node_t *tmp = child->avl_back;
+
+                               ASSERT(child->avl_balance == AVL_BACK && tmp);
+
+                               child->avl_back = tmp->avl_forw;
+                               if (tmp->avl_forw)
+                                       tmp->avl_forw->avl_parent = child;
+
+                               tmp->avl_forw = child;
+                               child->avl_parent = tmp;
+
+                               np->avl_forw = tmp->avl_back;
+                               if (tmp->avl_back)
+                                       tmp->avl_back->avl_parent = np;
+
+                               tmp->avl_back = np;
+                               np->avl_parent = tmp;
+
+                               if (tmp->avl_balance == AVL_FORW)
+                                       np->avl_balance = AVL_BACK;
+                               else
+                                       np->avl_balance = AVL_BALANCE;
+
+                               if (tmp->avl_balance == AVL_BACK)
+                                       child->avl_balance = AVL_FORW;
+                               else
+                                       child->avl_balance = AVL_BALANCE;
+
+                               child = tmp;
+                       }
+               }
+
+               child->avl_parent = parent;
+               child->avl_balance = AVL_BALANCE;
+
+               if (parent) {
+                       if (parent->avl_back == np)
+                               parent->avl_back = child;
+                       else
+                               parent->avl_forw = child;
+               } else {
+                       ASSERT(*rootp == np);
+                       *rootp = child;
+               }
+
+               break;
+       }
+}
+
+static
+avl64node_t *
+avl64_insert_find_growth(
+               register avl64tree_desc_t *tree,
+               register __uint64_t start,      /* range start at start, */
+               register __uint64_t end,        /* exclusive */
+               register int   *growthp)        /* OUT */ 
+{
+       avl64node_t *root = tree->avl_root;
+       register avl64node_t *np;
+
+       np = root;
+       ASSERT(np); /* caller ensures that there is atleast one node in tree */
+
+       for ( ; ; ) {
+               CERT(np->avl_parent || root == np);
+               CERT(!np->avl_parent || root != np);
+               CERT(!(np->avl_back) || np->avl_back->avl_parent == np);
+               CERT(!(np->avl_forw) || np->avl_forw->avl_parent == np);
+               CERT(np->avl_balance != AVL_FORW || np->avl_forw);
+               CERT(np->avl_balance != AVL_BACK || np->avl_back);
+               CERT(np->avl_balance != AVL_BALANCE ||
+                    np->avl_back == NULL || np->avl_forw);
+               CERT(np->avl_balance != AVL_BALANCE ||
+                    np->avl_forw == NULL || np->avl_back);
+
+               if (AVL_START(tree, np) >= end) {
+                       if (np->avl_back) {
+                               np = np->avl_back;
+                               continue;
+                       }
+                       *growthp = AVL_BACK;
+                       break;
+               }
+
+               if (AVL_END(tree, np) <= start) {
+                       if (np->avl_forw) {
+                               np = np->avl_forw;
+                               continue;
+                       }
+                       *growthp = AVL_FORW;
+                       break;
+               }
+               /* found exact match -- let caller decide if it is an error */
+               return(NULL);
+       }
+       return(np);
+}
+
+
+static void
+avl64_insert_grow(
+       register avl64tree_desc_t *tree,
+       register avl64node_t *parent,
+       register avl64node_t *newnode,
+       register int growth)
+{
+       register avl64node_t *nnext;
+       register __uint64_t start = AVL_START(tree, newnode);
+
+       if (growth == AVL_BACK) {
+
+               parent->avl_back = newnode;
+               /*
+                * we are growing to the left; previous in-order to newnode is
+                * closest ancestor with lesser value. Before this
+                * insertion, this ancestor will be pointing to
+                * newnode's parent. After insertion, next in-order to newnode
+                * is the parent.
+                */
+               newnode->avl_nextino = parent;
+               nnext = parent;
+               while (nnext) {
+                       if (AVL_END(tree, nnext) <= start)
+                               break;
+                       nnext = nnext->avl_parent;
+               }
+               if (nnext)  {
+                       /*
+                        * nnext will be null if newnode is
+                        * the least element, and hence very first in the list.
+                        */
+                       ASSERT(nnext->avl_nextino == parent);
+                       nnext->avl_nextino = newnode;
+               }
+       }
+       else {
+               parent->avl_forw = newnode;
+               newnode->avl_nextino = parent->avl_nextino;
+               parent->avl_nextino = newnode;
+       }
+}
+
+
+avl64node_t *
+avl64_insert(
+       register avl64tree_desc_t *tree,
+       register avl64node_t *newnode)
+{
+       register avl64node_t *np;
+       register __uint64_t start = AVL_START(tree, newnode);
+       register __uint64_t end = AVL_END(tree, newnode);
+       int growth;
+
+       ASSERT(newnode);
+       /*
+        * Clean all pointers for sanity; some will be reset as necessary.
+        */
+       newnode->avl_nextino = NULL;
+       newnode->avl_parent = NULL;
+       newnode->avl_forw = NULL;
+       newnode->avl_back = NULL;
+       newnode->avl_balance = AVL_BALANCE;
+
+       if ((np = tree->avl_root) == NULL) { /* degenerate case... */
+               tree->avl_root = newnode;
+               tree->avl_firstino = newnode;
+               return newnode;
+       }
+
+       if ((np = avl64_insert_find_growth(tree, start, end, &growth))
+                       == NULL) {
+               if (start != end)  { /* non-zero length range */
+#ifdef AVL_USER_MODE
+               printf("avl_insert: Warning! duplicate range [0x%llx,0x%llx)\n",
+                               start, end);
+#else
+                       cmn_err(CE_CONT,
+               "!avl_insert: Warning! duplicate range [0x%llx,0x%llx)\n",
+                               start, end);
+#endif
+               }
+               return(NULL);
+       }
+
+       avl64_insert_grow(tree, np, newnode, growth);
+       if (growth == AVL_BACK) {
+               /*
+                * Growing to left. if np was firstino, newnode will be firstino
+                */
+                if (tree->avl_firstino == np)
+                       tree->avl_firstino = newnode;
+       }
+#ifdef notneeded
+       else
+       if (growth == AVL_FORW)
+               /*
+                * Cannot possibly be firstino; there is somebody to our left.
+                */
+                ;
+#endif
+
+       newnode->avl_parent = np;
+       CERT(np->avl_forw == newnode || np->avl_back == newnode);
+
+       avl64_balance(&tree->avl_root, np, growth);
+
+       avl64_checktree(tree, tree->avl_root);
+
+       return newnode;
+}
+
+/*
+ *
+ * avl64_insert_immediate(tree, afterp, newnode):
+ *     insert newnode immediately into tree immediately after afterp.
+ *     after insertion, newnode is right child of afterp.
+ */
+void
+avl64_insert_immediate(
+               avl64tree_desc_t *tree,
+               avl64node_t *afterp,
+               avl64node_t *newnode)
+{
+       /*
+        * Clean all pointers for sanity; some will be reset as necessary.
+        */
+       newnode->avl_nextino = NULL;
+       newnode->avl_parent = NULL;
+       newnode->avl_forw = NULL;
+       newnode->avl_back = NULL;
+       newnode->avl_balance = AVL_BALANCE;
+
+       if (afterp == NULL) {
+               tree->avl_root = newnode;
+               tree->avl_firstino = newnode;
+               return;
+       }
+
+       ASSERT(afterp->avl_forw == NULL);
+       avl64_insert_grow(tree, afterp, newnode, AVL_FORW); /* grow to right */
+       CERT(afterp->avl_forw == newnode);
+       avl64_balance(&tree->avl_root, afterp, AVL_FORW);
+       avl64_checktree(tree, tree->avl_root);
+}
+
+
+/*
+ *     Returns first in order node
+ */
+avl64node_t *
+avl64_firstino(register avl64node_t *root)
+{
+       register avl64node_t *np;
+
+       if ((np = root) == NULL)
+               return NULL;
+
+       while (np->avl_back)
+               np = np->avl_back;
+       return np;
+}
+
+#ifdef AVL_USER_MODE
+/*
+ * leave this as a user-mode only routine until someone actually
+ * needs it in the kernel
+ */
+
+/*
+ *     Returns last in order node
+ */
+avl64node_t *
+avl64_lastino(register avl64node_t *root)
+{
+       register avl64node_t *np;
+
+       if ((np = root) == NULL)
+               return NULL;
+
+       while (np->avl_forw)
+               np = np->avl_forw;
+       return np;
+}
+#endif
+
+void
+avl64_init_tree(avl64tree_desc_t *tree, avl64ops_t *ops)
+{
+       tree->avl_root = NULL;
+       tree->avl_firstino = NULL;
+       tree->avl_ops = ops;
+}
+
+#ifdef AVL_DEBUG
+static void
+avl64_printnode(avl64tree_desc_t *tree, avl64node_t *np, int nl)
+{
+       printf("[%d-%d]%c", AVL_START(tree, np),
+               (AVL_END(tree, np) - 1), nl ? '\n' : ' ');
+}
+#endif
+#ifdef STAND_ALONE_DEBUG
+
+struct avl_debug_node {
+       avl64node_t     avl_node;
+       xfs_off_t               avl_start;
+       unsigned int    avl_size;
+}
+
+avl64ops_t avl_debug_ops = {
+       avl_debug_start,
+       avl_debug_end,
+}
+
+static __uint64_t
+avl64_debug_start(avl64node_t *node)
+{
+       return (__uint64_t)(struct avl_debug_node *)node->avl_start;
+}
+
+static __uint64_t
+avl64_debug_end(avl64node_t *node)
+{
+       return (__uint64_t)
+               ((struct avl_debug_node *)node->avl_start +
+                (struct avl_debug_node *)node->avl_size);
+}
+
+avl_debug_node         freenodes[100];
+avl_debug_node         *freehead = &freenodes[0];
+
+static avl64node_t *
+alloc_avl64_debug_node()
+{
+       freehead->avl_balance = AVL_BALANCE;
+       freehead->avl_parent = freehead->avl_forw = freehead->avl_back = NULL;
+       return(freehead++);
+}
+
+static void
+avl64_print(avl64tree_desc_t *tree, avl64node_t *root, int depth)
+{
+       int i;
+
+       if (!root)
+               return;
+       if (root->avl_forw)
+               avl64_print(tree, root->avl_forw, depth+5);
+       for (i = 0; i < depth; i++)
+               putchar((int) ' ');
+       avl64_printnode(tree, root,1);
+       if (root->avl_back)
+               avl64_print(tree, root->avl_back, depth+5);
+}
+
+main()
+{
+       int             i, j;
+       avl64node_t     *np;
+       avl64tree_desc_t        tree;
+       char            linebuf[256], cmd[256];
+
+       avl64_init_tree(&tree, &avl_debug_ops);
+
+       for (i = 100; i > 0; i = i - 10)
+       {       
+               np = alloc__debug_avlnode();
+               ASSERT(np);
+               np->avl_start = i;
+               np->avl_size = 10;
+               avl64_insert(&tree, np);
+       }
+       avl64_print(&tree, tree.avl_root, 0);
+
+       for (np = tree.avl_firstino; np != NULL; np = np->avl_nextino)
+               avl64_printnode(&tree, np, 0);
+       printf("\n");
+
+       while (1) {
+               printf("Command [fpdir] : ");
+               fgets(linebuf, 256, stdin);
+               if (feof(stdin)) break;
+               cmd[0] = NULL;
+               if (sscanf(linebuf, "%[fpdir]%d", cmd, &i) != 2)
+                       continue;
+               switch (cmd[0]) {
+               case 'd':
+               case 'f':
+                       printf("end of range ? ");
+                       fgets(linebuf, 256, stdin);
+                       j = atoi(linebuf);
+
+                       if (i == j) j = i+1;
+                       np = avl64_findinrange(&tree,i,j);
+                       if (np) {
+                               avl64_printnode(&tree, np, 1);
+                               if (cmd[0] == 'd')
+                                       avl64_delete(&tree, np);
+                       } else
+                               printf("Cannot find %d\n", i);
+                       break;
+               case 'p':
+                       avl64_print(&tree, tree.avl_root, 0);
+                       for (np = tree.avl_firstino;
+                               np != NULL; np = np->avl_nextino)
+                                       avl64_printnode(&tree, np, 0);
+                       printf("\n");
+                       break;
+               case 'i':
+                       np = alloc_avlnode();
+                       ASSERT(np);
+                       np->avl_start = i;
+                       printf("size of range ? ");
+                       fgets(linebuf, 256, stdin);
+                       j = atoi(linebuf);
+
+                       np->avl_size = j;
+                       avl64_insert(&tree, np);
+                       break;
+               case 'r': {
+                       avl64node_t     *b, *e, *t;
+                       int             checklen;
+
+                       printf("End of range ? ");
+                       fgets(linebuf, 256, stdin);
+                       j = atoi(linebuf);
+
+                       printf("checklen 0/1 ? ");
+                       fgets(linebuf, 256, stdin);
+                       checklen = atoi(linebuf);
+
+
+                       b = avl64_findanyrange(&tree, i, j, checklen);
+                       if (b) {
+                               printf("Found something\n");
+                               t = b;
+                               while (t)  {
+                                       if (t != b &&
+                                           AVL_START(&tree, t) >= j)
+                                               break;
+                                       avl64_printnode(&tree, t, 0);
+                                       t = t->avl_nextino;
+                               }
+                               printf("\n");
+                       }
+                    }
+               }
+       }
+}
+#endif
+
+/*
+ *     Given a tree, find value; will find return range enclosing value,
+ *     or range immediately succeeding value,
+ *     or range immediately preceeding value.
+ */
+avl64node_t *
+avl64_findadjacent(
+       register avl64tree_desc_t *tree,
+       register __uint64_t value,
+       register int            dir)
+{
+        register avl64node_t *np = tree->avl_root;
+
+       while (np) {
+               if (value < AVL_START(tree, np)) {
+                       if (np->avl_back) {
+                               np = np->avl_back;
+                               continue;
+                       }
+                       /* if we were to add node with value, would
+                        * have a growth of AVL_BACK
+                        */
+                       if (dir == AVL_SUCCEED) {
+                               /* if succeeding node is needed, this is it.
+                                */
+                               return(np);
+                       }
+                       if (dir == AVL_PRECEED) {
+                               /*
+                                * find nearest ancestor with lesser value.
+                                */
+                                np = np->avl_parent;
+                                while (np) {
+                                       if (AVL_END(tree, np) <= value)
+                                               break;
+                                       np = np->avl_parent;
+                               }
+                               return(np);
+                       }
+                       ASSERT(dir == AVL_SUCCEED || dir == AVL_PRECEED);
+                       break;
+               }
+               if (value >= AVL_END(tree, np)) {
+                       if (np->avl_forw) {
+                               np = np->avl_forw;
+                               continue;
+                       }
+                       /* if we were to add node with value, would
+                        * have a growth of AVL_FORW; 
+                        */
+                       if (dir == AVL_SUCCEED) {
+                               /* we are looking for a succeeding node;
+                                * this is nextino.
+                                */
+                               return(np->avl_nextino);
+                       }
+                       if (dir == AVL_PRECEED) {
+                               /* looking for a preceeding node; this is it. */
+                               return(np);
+                       }       
+                       ASSERT(dir == AVL_SUCCEED || dir == AVL_PRECEED);
+               }
+               /* AVL_START(tree, np) <= value < AVL_END(tree, np) */
+               return(np);
+       }
+       return NULL;
+}
+
+
+#ifdef AVL_FUTURE_ENHANCEMENTS
+/*
+ *     avl_findranges:
+ *
+ *     Given range r [start, end), find all ranges in tree which are contained
+ *     in r. At return, startp and endp point to first and last of
+ *     a chain of elements which describe the contained ranges. Elements
+ *     in startp ... endp are in sort order, and can be accessed by
+ *     using avl_nextino.
+ */
+
+void
+avl64_findranges(
+       register avl64tree_desc_t *tree,
+       register __uint64_t start,
+       register __uint64_t end,
+       avl64node_t             **startp,
+       avl64node_t             **endp)
+{
+        register avl64node_t *np;
+
+       np = avl64_findadjacent(tree, start, AVL_SUCCEED);
+       if (np == NULL                          /* nothing succeding start */
+               || (np && (end <= AVL_START(tree, np))))
+                                               /* something follows start,
+                                               but... is entirely after end */
+       {
+               *startp = NULL;
+               *endp = NULL;
+               return;
+       }
+
+       *startp = np;
+
+       /* see if end is in this region itself */
+       if (end <= AVL_END(tree, np) ||
+           np->avl_nextino == NULL ||
+           (np->avl_nextino &&
+           (end <= AVL_START(tree, np->avl_nextino)))) {
+               *endp = np;
+               return;
+       }
+       /* have to munge for end */
+       /*
+        * note: have to look for (end - 1), since
+        * findadjacent will look for exact value, and does not
+        * care about the fact that end is actually one more
+        * than the value actually being looked for; thus feed it one less.
+        */
+       *endp = avl64_findadjacent(tree, (end-1), AVL_PRECEED);
+       ASSERT(*endp);
+}
+
+#endif /* AVL_FUTURE_ENHANCEMENTS */
diff --git a/repair/avl64.h b/repair/avl64.h
new file mode 100644 (file)
index 0000000..26ed977
--- /dev/null
@@ -0,0 +1,151 @@
+/**************************************************************************
+ *                                                                       *
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ *                                                                       *
+ **************************************************************************/
+#ifndef __XR_AVL64_H__
+#define __XR_AVL64_H__
+
+#include <sys/types.h>
+
+typedef struct avl64node {
+       struct  avl64node       *avl_forw;      /* pointer to right child  (> parent) */
+       struct  avl64node *avl_back;    /* pointer to left child  (< parent) */
+       struct  avl64node *avl_parent;  /* parent pointer */
+       struct  avl64node *avl_nextino; /* next in-order; NULL terminated list*/
+       char             avl_balance;   /* tree balance */
+} avl64node_t;
+
+/*
+ * avl-tree operations
+ */
+typedef struct avl64ops {
+       __uint64_t      (*avl_start)(avl64node_t *);
+       __uint64_t      (*avl_end)(avl64node_t *);
+} avl64ops_t;
+
+/*
+ * avoid complaints about multiple def's since these are only used by
+ * the avl code internally
+ */
+#ifndef AVL_START
+#define        AVL_START(tree, n)      (*(tree)->avl_ops->avl_start)(n)
+#define        AVL_END(tree, n)        (*(tree)->avl_ops->avl_end)(n)
+#endif
+
+/* 
+ * tree descriptor:
+ *     root points to the root of the tree.
+ *     firstino points to the first in the ordered list.
+ */
+typedef struct avl64tree_desc {
+       avl64node_t     *avl_root;
+       avl64node_t     *avl_firstino;
+       avl64ops_t      *avl_ops;
+} avl64tree_desc_t;
+
+/* possible values for avl_balance */
+
+#define AVL_BACK       1
+#define AVL_BALANCE    0
+#define AVL_FORW       2
+
+/*
+ * 'Exported' avl tree routines
+ */
+avl64node_t
+*avl64_insert(
+       avl64tree_desc_t *tree,
+       avl64node_t *newnode);
+
+void
+avl64_delete(
+       avl64tree_desc_t *tree,
+       avl64node_t *np);
+
+void
+avl64_insert_immediate(
+       avl64tree_desc_t *tree,
+       avl64node_t *afterp,
+       avl64node_t *newnode);
+       
+void
+avl64_init_tree(
+       avl64tree_desc_t  *tree,
+       avl64ops_t *ops);
+
+avl64node_t *
+avl64_findrange(
+       avl64tree_desc_t *tree,
+       __uint64_t value);
+
+avl64node_t *
+avl64_find(
+       avl64tree_desc_t *tree,
+       __uint64_t value);
+
+avl64node_t *
+avl64_findanyrange(
+       avl64tree_desc_t *tree,
+       __uint64_t      start,
+       __uint64_t      end,
+       int     checklen);
+
+
+avl64node_t *
+avl64_findadjacent(
+       avl64tree_desc_t *tree,
+       __uint64_t      value,
+       int             dir);
+
+#ifdef AVL_FUTURE_ENHANCEMENTS
+void
+avl64_findranges(
+       register avl64tree_desc_t *tree,
+       register __uint64_t     start,
+       register __uint64_t     end,
+       avl64node_t             **startp,
+       avl64node_t             **endp);
+#endif
+
+/*
+ * avoid complaints about multiple def's since these are only used by
+ * the avl code internally
+ */
+#ifndef AVL_PRECEED
+#define AVL_PRECEED    0x1
+#define AVL_SUCCEED    0x2
+
+#define AVL_INCLUDE_ZEROLEN    0x0000
+#define AVL_EXCLUDE_ZEROLEN    0x0001
+#endif
+
+#endif /* __XR_AVL64_H__ */
diff --git a/repair/bmap.c b/repair/bmap.c
new file mode 100644 (file)
index 0000000..47c8bbf
--- /dev/null
@@ -0,0 +1,409 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "err_protos.h"
+#include "bmap.h"
+
+/*
+ * Block mapping code taken from xfs_db.
+ */
+
+/*
+ * Append an extent to the block entry.
+ */
+void
+blkent_append(
+       blkent_t        **entp,
+       xfs_dfsbno_t    b,
+       xfs_dfilblks_t  c)
+{
+       blkent_t        *ent;
+       size_t          size;
+       int             i;
+
+       ent = *entp;
+       size = BLKENT_SIZE(c + ent->nblks);
+       if ((*entp = ent = realloc(ent, size)) == NULL) {
+               do_warn("realloc failed in blkent_append (%u bytes)\n", size);
+               return;
+       }
+       for (i = 0; i < c; i++)
+               ent->blks[ent->nblks + i] = b + i;
+       ent->nblks += c;
+}
+
+/*
+ * Make a new block entry.
+ */
+blkent_t *
+blkent_new(
+       xfs_dfiloff_t   o,
+       xfs_dfsbno_t    b,
+       xfs_dfilblks_t  c)
+{
+       blkent_t        *ent;
+       int             i;
+
+       if ((ent = malloc(BLKENT_SIZE(c))) == NULL) {
+               do_warn("malloc failed in blkent_new (%u bytes)\n",
+                       BLKENT_SIZE(c));
+               return ent;
+       }
+       ent->nblks = c;
+       ent->startoff = o;
+       for (i = 0; i < c; i++)
+               ent->blks[i] = b + i;
+       return ent;
+}
+
+/*
+ * Prepend an extent to the block entry.
+ */
+void
+blkent_prepend(
+       blkent_t        **entp,
+       xfs_dfsbno_t    b,
+       xfs_dfilblks_t  c)
+{
+       int             i;
+       blkent_t        *newent;
+       blkent_t        *oldent;
+
+       oldent = *entp;
+       if ((newent = malloc(BLKENT_SIZE(oldent->nblks + c))) == NULL) {
+               do_warn("malloc failed in blkent_prepend (%u bytes)\n",
+                       BLKENT_SIZE(oldent->nblks + c));
+               *entp = newent;
+               return;
+       }
+       newent->nblks = oldent->nblks + c;
+       newent->startoff = oldent->startoff - c;
+       for (i = 0; i < c; i++)
+               newent->blks[i] = b + c;
+       for (; i < oldent->nblks + c; i++)
+               newent->blks[i] = oldent->blks[i - c];
+       free(oldent);
+       *entp = newent;
+}
+
+/*
+ * Allocate a block map.
+ */
+blkmap_t *
+blkmap_alloc(
+       xfs_extnum_t    nex)
+{
+       blkmap_t        *blkmap;
+
+       if (nex < 1)
+               nex = 1;
+       if ((blkmap = malloc(BLKMAP_SIZE(nex))) == NULL) {
+               do_warn("malloc failed in blkmap_alloc (%u bytes)\n",
+                       BLKMAP_SIZE(nex));
+               return blkmap;
+       }
+       blkmap->naents = nex;
+       blkmap->nents = 0;
+       return blkmap;
+}
+
+/*
+ * Free a block map.
+ */
+void
+blkmap_free(
+       blkmap_t        *blkmap)
+{
+       blkent_t        **entp;
+       xfs_extnum_t    i;
+
+       if (blkmap == NULL)
+               return;
+       for (i = 0, entp = blkmap->ents; i < blkmap->nents; i++, entp++)
+               free(*entp);
+       free(blkmap);
+}
+
+/*
+ * Get one entry from a block map.
+ */
+xfs_dfsbno_t
+blkmap_get(
+       blkmap_t        *blkmap,
+       xfs_dfiloff_t   o)
+{
+       blkent_t        *ent;
+       blkent_t        **entp;
+       int             i;
+
+       for (i = 0, entp = blkmap->ents; i < blkmap->nents; i++, entp++) {
+               ent = *entp;
+               if (o >= ent->startoff && o < ent->startoff + ent->nblks)
+                       return ent->blks[o - ent->startoff];
+       }
+       return NULLDFSBNO;
+}
+
+/*
+ * Get a chunk of entries from a block map.
+ */
+int
+blkmap_getn(
+       blkmap_t        *blkmap,
+       xfs_dfiloff_t   o,
+       xfs_dfilblks_t  nb,
+       bmap_ext_t      **bmpp)
+{
+       bmap_ext_t      *bmp;
+       blkent_t        *ent;
+       xfs_dfiloff_t   ento;
+       blkent_t        **entp;
+       int             i;
+       int             nex;
+
+       for (i = nex = 0, bmp = NULL, entp = blkmap->ents;
+            i < blkmap->nents;
+            i++, entp++) {
+               ent = *entp;
+               if (ent->startoff >= o + nb)
+                       break;
+               if (ent->startoff + ent->nblks <= o)
+                       continue;
+               for (ento = ent->startoff;
+                    ento < ent->startoff + ent->nblks && ento < o + nb;
+                    ento++) {
+                       if (ento < o)
+                               continue;
+                       if (bmp &&
+                           bmp[nex - 1].startoff + bmp[nex - 1].blockcount ==
+                                   ento &&
+                           bmp[nex - 1].startblock + bmp[nex - 1].blockcount ==
+                                   ent->blks[ento - ent->startoff])
+                               bmp[nex - 1].blockcount++;
+                       else {
+                               bmp = realloc(bmp, ++nex * sizeof(*bmp));
+                               if (bmp == NULL) {
+                                       do_warn("realloc failed in blkmap_getn"
+                                               " (%u bytes)\n",
+                                               nex * sizeof(*bmp));
+                                       continue;
+                               }
+                               bmp[nex - 1].startoff = ento;
+                               bmp[nex - 1].startblock =
+                                       ent->blks[ento - ent->startoff];
+                               bmp[nex - 1].blockcount = 1;
+                               bmp[nex - 1].flag = 0;
+                       }
+               }
+       }
+       *bmpp = bmp;
+       return nex;
+}
+
+/*
+ * Make a block map larger.
+ */
+void
+blkmap_grow(
+       blkmap_t        **blkmapp,
+       blkent_t        **entp,
+       blkent_t        *newent)
+{
+       blkmap_t        *blkmap;
+       size_t          size;
+       int             i;
+       int             idx;
+
+       blkmap = *blkmapp;
+       idx = (int)(entp - blkmap->ents);
+       if (blkmap->naents == blkmap->nents) {
+               size = BLKMAP_SIZE(blkmap->nents + 1);
+               if ((*blkmapp = blkmap = realloc(blkmap, size)) == NULL) {
+                       do_warn("realloc failed in blkmap_grow (%u bytes)\n",
+                               size);
+                       return;
+               }
+               blkmap->naents++;
+       }
+       for (i = blkmap->nents; i > idx; i--)
+               blkmap->ents[i] = blkmap->ents[i - 1];
+       blkmap->ents[idx] = newent;
+       blkmap->nents++;
+}
+
+/*
+ * Return the last offset in a block map.
+ */
+xfs_dfiloff_t
+blkmap_last_off(
+       blkmap_t        *blkmap)
+{
+       blkent_t        *ent;
+
+       if (!blkmap->nents)
+               return NULLDFILOFF;
+       ent = blkmap->ents[blkmap->nents - 1];
+       return ent->startoff + ent->nblks;
+}
+
+/*
+ * Return the next offset in a block map.
+ */
+xfs_dfiloff_t
+blkmap_next_off(
+       blkmap_t        *blkmap,
+       xfs_dfiloff_t   o,
+       int             *t)
+{
+       blkent_t        *ent;
+       blkent_t        **entp;
+
+       if (!blkmap->nents)
+               return NULLDFILOFF;
+       if (o == NULLDFILOFF) {
+               *t = 0;
+               ent = blkmap->ents[0];
+               return ent->startoff;
+       }
+       entp = &blkmap->ents[*t];
+       ent = *entp;
+       if (o < ent->startoff + ent->nblks - 1)
+               return o + 1;
+       entp++;
+       if (entp >= &blkmap->ents[blkmap->nents])
+               return NULLDFILOFF;
+       (*t)++;
+       ent = *entp;
+       return ent->startoff;
+}
+
+/*
+ * Set a block value in a block map.
+ */
+void
+blkmap_set_blk(
+       blkmap_t        **blkmapp,
+       xfs_dfiloff_t   o,
+       xfs_dfsbno_t    b)
+{
+       blkmap_t        *blkmap;
+       blkent_t        *ent;
+       blkent_t        **entp;
+       blkent_t        *nextent;
+
+       blkmap = *blkmapp;
+       for (entp = blkmap->ents; entp < &blkmap->ents[blkmap->nents]; entp++) {
+               ent = *entp;
+               if (o < ent->startoff - 1) {
+                       ent = blkent_new(o, b, 1);
+                       blkmap_grow(blkmapp, entp, ent);
+                       return;
+               }
+               if (o == ent->startoff - 1) {
+                       blkent_prepend(entp, b, 1);
+                       return;
+               }
+               if (o >= ent->startoff && o < ent->startoff + ent->nblks) {
+                       ent->blks[o - ent->startoff] = b;
+                       return;
+               }
+               if (o > ent->startoff + ent->nblks)
+                       continue;
+               blkent_append(entp, b, 1);
+               if (entp == &blkmap->ents[blkmap->nents - 1])
+                       return;
+               ent = *entp;
+               nextent = entp[1];
+               if (ent->startoff + ent->nblks < nextent->startoff)
+                       return;
+               blkent_append(entp, nextent->blks[0], nextent->nblks);
+               blkmap_shrink(blkmap, &entp[1]);
+               return;
+       }
+       ent = blkent_new(o, b, 1);
+       blkmap_grow(blkmapp, entp, ent);
+}
+
+/*
+ * Set an extent into a block map.
+ */
+void
+blkmap_set_ext(
+       blkmap_t        **blkmapp,
+       xfs_dfiloff_t   o,
+       xfs_dfsbno_t    b,
+       xfs_dfilblks_t  c)
+{
+       blkmap_t        *blkmap;
+       blkent_t        *ent;
+       blkent_t        **entp;
+       xfs_extnum_t    i;
+
+       blkmap = *blkmapp;
+       if (!blkmap->nents) {
+               blkmap->ents[0] = blkent_new(o, b, c);
+               blkmap->nents = 1;
+               return;
+       }
+       entp = &blkmap->ents[blkmap->nents - 1];
+       ent = *entp;
+       if (ent->startoff + ent->nblks == o) {
+               blkent_append(entp, b, c);
+               return;
+       }
+       if (ent->startoff + ent->nblks < o) {
+               ent = blkent_new(o, b, c);
+               blkmap_grow(blkmapp, &blkmap->ents[blkmap->nents], ent);
+               return;
+       }
+       for (i = 0; i < c; i++)
+               blkmap_set_blk(blkmapp, o + i, b + i);
+}
+
+/*
+ * Make a block map smaller.
+ */
+void
+blkmap_shrink(
+       blkmap_t        *blkmap,
+       blkent_t        **entp)
+{
+       int             i;
+       int             idx;
+
+       free(*entp);
+       idx = (int)(entp - blkmap->ents);
+       for (i = idx + 1; i < blkmap->nents; i++)
+               blkmap->ents[i] = blkmap->ents[i - 1];
+       blkmap->nents--;
+}
diff --git a/repair/bmap.h b/repair/bmap.h
new file mode 100644 (file)
index 0000000..0b184ee
--- /dev/null
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+/*
+ * Block mapping code taken from xfs_db.
+ */
+
+/*
+ * Block map entry.
+ */
+typedef struct blkent {
+       xfs_dfiloff_t   startoff;
+       xfs_dfilblks_t  nblks;
+       xfs_dfsbno_t    blks[1];
+} blkent_t;
+#define        BLKENT_SIZE(n)  \
+       (offsetof(blkent_t, blks) + (sizeof(xfs_dfsbno_t) * (n)))
+
+/*
+ * Block map.
+ */
+typedef        struct blkmap {
+       int             naents;
+       int             nents;
+       blkent_t        *ents[1];
+} blkmap_t;
+#define        BLKMAP_SIZE(n)  \
+       (offsetof(blkmap_t, ents) + (sizeof(blkent_t *) * (n)))
+
+/*
+ * Extent descriptor.
+ */
+typedef struct bmap_ext {
+       xfs_dfiloff_t   startoff;
+       xfs_dfsbno_t    startblock;
+       xfs_dfilblks_t  blockcount;
+       int             flag;
+} bmap_ext_t;
+
+void           blkent_append(blkent_t **entp, xfs_dfsbno_t b,
+                             xfs_dfilblks_t c);
+blkent_t       *blkent_new(xfs_dfiloff_t o, xfs_dfsbno_t b, xfs_dfilblks_t c);
+void           blkent_prepend(blkent_t **entp, xfs_dfsbno_t b,
+                              xfs_dfilblks_t c);
+blkmap_t       *blkmap_alloc(xfs_extnum_t);
+void           blkmap_free(blkmap_t *blkmap);
+xfs_dfsbno_t   blkmap_get(blkmap_t *blkmap, xfs_dfiloff_t o);
+int            blkmap_getn(blkmap_t *blkmap, xfs_dfiloff_t o,
+                           xfs_dfilblks_t nb, bmap_ext_t **bmpp);
+void           blkmap_grow(blkmap_t **blkmapp, blkent_t **entp,
+                           blkent_t *newent);
+xfs_dfiloff_t  blkmap_last_off(blkmap_t *blkmap);
+xfs_dfiloff_t  blkmap_next_off(blkmap_t *blkmap, xfs_dfiloff_t o, int *t);
+void           blkmap_set_blk(blkmap_t **blkmapp, xfs_dfiloff_t o,
+                              xfs_dfsbno_t b);
+void           blkmap_set_ext(blkmap_t **blkmapp, xfs_dfiloff_t o,
+                              xfs_dfsbno_t b, xfs_dfilblks_t c);
+void           blkmap_shrink(blkmap_t *blkmap, blkent_t **entp);
diff --git a/repair/dino_chunks.c b/repair/dino_chunks.c
new file mode 100644 (file)
index 0000000..90d2e9f
--- /dev/null
@@ -0,0 +1,1178 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "avl.h"
+#include "globals.h"
+#include "agheader.h"
+#include "incore.h"
+#include "protos.h"
+#include "err_protos.h"
+#include "dir.h"
+#include "dinode.h"
+#include "versions.h"
+
+/*
+ * validates inode block or chunk, returns # of good inodes
+ * the dinodes are verified using verify_uncertain_dinode() which
+ * means only the basic inode info is checked, no fork checks.
+ */
+
+int
+check_aginode_block(xfs_mount_t        *mp,
+                       xfs_agnumber_t  agno,
+                       xfs_agblock_t   agbno)
+{
+
+       xfs_dinode_t    *dino_p;
+        int            i;
+        int            cnt = 0;
+       xfs_buf_t       *bp;
+
+       /*
+        * it's ok to read these possible inode blocks in one at
+        * a time because they don't belong to known inodes (if
+        * they did, we'd know about them courtesy of the incore inode
+        * tree and we wouldn't be here and we stale the buffers out
+        * so no one else will overlap them.
+        */
+       bp = libxfs_readbuf(mp->m_dev, XFS_AGB_TO_DADDR(mp, agno, agbno),
+                       XFS_FSB_TO_BB(mp, 1), 0);
+       if (!bp) {
+               do_warn("cannot read agbno (%u/%u), disk block %lld\n", agno,
+                       agbno, (xfs_daddr_t)XFS_AGB_TO_DADDR(mp, agno, agbno));
+               return(0);
+       }
+
+       for (i = 0; i < mp->m_sb.sb_inopblock; i++)  {
+               dino_p = XFS_MAKE_IPTR(mp, bp, i);
+               if (!verify_uncertain_dinode(mp, dino_p, agno,
+                               XFS_OFFBNO_TO_AGINO(mp, agbno, i)))
+                       cnt++;
+       }
+
+       libxfs_putbuf(bp);
+       return(cnt);
+}
+
+int
+check_inode_block(xfs_mount_t          *mp,
+                       xfs_ino_t       ino)
+{
+        return(check_aginode_block(mp, XFS_INO_TO_AGNO(mp, ino),
+                                       XFS_INO_TO_AGBNO(mp, ino)));
+}
+
+/*
+ * tries to establish if the inode really exists in a valid
+ * inode chunk.  returns number of new inodes if things are good
+ * and 0 if bad.  start is the start of the discovered inode chunk.
+ * routine assumes that ino is a legal inode number
+ * (verified by verify_inum()).  If the inode chunk turns out
+ * to be good, this routine will put the inode chunk into
+ * the good inode chunk tree if required.
+ *
+ * the verify_(ag)inode* family of routines are utility
+ * routines called by check_uncertain_aginodes() and
+ * process_uncertain_aginodes().
+ */
+int
+verify_inode_chunk(xfs_mount_t         *mp,
+                       xfs_ino_t       ino,
+                       xfs_ino_t       *start_ino)
+{
+       xfs_agnumber_t  agno;
+       xfs_agino_t     agino;
+       xfs_agino_t     start_agino;
+       xfs_agblock_t   agbno;
+       xfs_agblock_t   start_agbno = 0;
+       xfs_agblock_t   end_agbno;
+       xfs_agblock_t   max_agbno;
+       xfs_agblock_t   cur_agbno;
+       xfs_agblock_t   chunk_start_agbno;
+       xfs_agblock_t   chunk_stop_agbno;
+       ino_tree_node_t *irec_before_p = NULL;
+       ino_tree_node_t *irec_after_p = NULL;
+       ino_tree_node_t *irec_p;
+       ino_tree_node_t *irec_next_p;
+       int             irec_cnt;
+       int             ino_cnt = 0;
+       int             num_blks;
+       int             i;
+       int             j;
+       int             state;
+
+        agno = XFS_INO_TO_AGNO(mp, ino);
+        agino = XFS_INO_TO_AGINO(mp, ino);
+       agbno = XFS_INO_TO_AGBNO(mp, ino);
+       *start_ino = NULLFSINO;
+
+       ASSERT(XFS_IALLOC_BLOCKS(mp) > 0);
+
+       if (agno == mp->m_sb.sb_agcount - 1)
+               max_agbno = mp->m_sb.sb_dblocks -
+                       (xfs_drfsbno_t) mp->m_sb.sb_agblocks * agno;
+       else
+               max_agbno = mp->m_sb.sb_agblocks;
+
+       /*
+        * is the inode beyond the end of the AG?
+        */
+       if (agbno >= max_agbno)
+               return(0);
+
+       /*
+        * check for the easy case, inodes per block >= XFS_INODES_PER_CHUNK
+        * (multiple chunks per block)
+        */
+       if (XFS_IALLOC_BLOCKS(mp) == 1)  {
+               if (agbno > max_agbno)
+                       return(0);
+
+               if (check_inode_block(mp, ino) == 0)
+                       return(0);
+
+               switch (state = get_agbno_state(mp, agno, agbno))  {
+               case XR_E_INO:
+                       do_warn("uncertain inode block %d/%d already known\n",
+                               agno, agbno);
+                       break;
+               case XR_E_UNKNOWN:
+               case XR_E_FREE1:
+               case XR_E_FREE:
+                       set_agbno_state(mp, agno, agbno, XR_E_INO);
+                       break;
+               case XR_E_MULT:
+               case XR_E_INUSE:
+               case XR_E_INUSE_FS:
+               case XR_E_FS_MAP:
+                       /*
+                        * if block is already claimed, forget it.
+                        */
+                       do_warn(
+                           "inode block %d/%d multiply claimed, (state %d)\n",
+                               agno, agbno, state);
+                       set_agbno_state(mp, agno, agbno, XR_E_MULT);
+                       return(0);
+               default:
+                       do_warn("inode block %d/%d bad state, (state %d)\n",
+                               agno, agbno, state);
+                       set_agbno_state(mp, agno, agbno, XR_E_INO);
+                       break;
+               }
+
+               start_agino = XFS_OFFBNO_TO_AGINO(mp, agbno, 0);
+               *start_ino = XFS_AGINO_TO_INO(mp, agno, start_agino);
+
+               /*
+                * put new inode record(s) into inode tree
+                */
+               for (j = 0; j < chunks_pblock; j++)  {
+                       if ((irec_p = find_inode_rec(agno, start_agino))
+                                       == NULL)  {
+                               irec_p = set_inode_free_alloc(agno,
+                                                       start_agino);
+                               for (i = 1; i < XFS_INODES_PER_CHUNK; i++)
+                                       set_inode_free(irec_p, i);
+                       }
+                       if (start_agino <= agino && agino <
+                                       start_agino + XFS_INODES_PER_CHUNK)
+                               set_inode_used(irec_p, agino - start_agino);
+
+                       start_agino += XFS_INODES_PER_CHUNK;
+                       ino_cnt += XFS_INODES_PER_CHUNK;
+               }
+
+               return(ino_cnt);
+       } else if (fs_aligned_inodes)  {
+               /*
+                * next easy case -- aligned inode filesystem.
+                * just check out the chunk
+                */
+               start_agbno = rounddown(XFS_INO_TO_AGBNO(mp, ino),
+                                       fs_ino_alignment);
+               end_agbno = start_agbno + XFS_IALLOC_BLOCKS(mp);
+
+               /*
+                * if this fs has aligned inodes but the end of the
+                * chunk is beyond the end of the ag, this is a bad
+                * chunk
+                */
+               if (end_agbno > max_agbno)
+                       return(0);
+
+               /*
+                * check out all blocks in chunk
+                */
+               ino_cnt = 0;
+               for (cur_agbno = start_agbno; cur_agbno < end_agbno;
+                                               cur_agbno++)  {
+                       ino_cnt += check_aginode_block(mp, agno, cur_agbno);
+               }
+
+               /*
+                * if we lose either 2 blocks worth of inodes or >25% of
+                * the chunk, just forget it.
+                */
+               if (ino_cnt < XFS_INODES_PER_CHUNK - 2 * mp->m_sb.sb_inopblock
+                               || ino_cnt < XFS_INODES_PER_CHUNK - 16)
+                       return(0);
+
+               /*
+                * ok, put the record into the tree.  we know that it's
+                * not already there since the inode is guaranteed
+                * not to be in the tree.
+                */
+               start_agino = XFS_OFFBNO_TO_AGINO(mp, start_agbno, 0);
+               *start_ino = XFS_AGINO_TO_INO(mp, agno, start_agino);
+
+               irec_p = set_inode_free_alloc(agno,
+                               XFS_OFFBNO_TO_AGINO(mp, start_agbno, 0));
+
+               for (i = 1; i < XFS_INODES_PER_CHUNK; i++)
+                       set_inode_free(irec_p, i);
+
+               ASSERT(start_agino <= agino &&
+                               start_agino + XFS_INODES_PER_CHUNK > agino);
+
+               set_inode_used(irec_p, agino - start_agino);
+
+               return(XFS_INODES_PER_CHUNK);
+       }
+
+       /*
+        * hard case -- pre-6.3 filesystem.
+        * set default start/end agbnos and ensure agbnos are legal.
+        * we're setting a range [start_agbno, end_agbno) such that
+        * a discovered inode chunk completely within that range
+        * would include the inode passed into us.
+        */
+       if (XFS_IALLOC_BLOCKS(mp) > 1)  {
+               if (agino > XFS_IALLOC_INODES(mp))
+                       start_agbno = agbno - XFS_IALLOC_BLOCKS(mp) + 1;
+               else
+                       start_agbno = 1;
+       }
+
+       end_agbno = agbno + XFS_IALLOC_BLOCKS(mp);
+
+       if (end_agbno > max_agbno)
+               end_agbno = max_agbno;
+
+       /*
+        * search tree for known inodes within +/- 1 inode chunk range
+        */
+       irec_before_p = irec_after_p = NULL;
+
+       find_inode_rec_range(agno, XFS_OFFBNO_TO_AGINO(mp, start_agbno, 0),
+               XFS_OFFBNO_TO_AGINO(mp, end_agbno, mp->m_sb.sb_inopblock - 1),
+               &irec_before_p, &irec_after_p);
+
+       /*
+        * if we have known inode chunks in our search range, establish
+        * their start and end-points to tighten our search range.  range
+        * is [start, end) -- e.g. max/end agbno is one beyond the
+        * last block to be examined.  the avl routines work this way.
+        */
+       if (irec_before_p)  {
+               /*
+                * only one inode record in the range, move one boundary in
+                */
+               if (irec_before_p == irec_after_p)  {
+                       if (irec_before_p->ino_startnum < agino)
+                               start_agbno = XFS_AGINO_TO_AGBNO(mp,
+                                               irec_before_p->ino_startnum +
+                                               XFS_INODES_PER_CHUNK);
+                       else
+                               end_agbno = XFS_AGINO_TO_AGBNO(mp,
+                                               irec_before_p->ino_startnum);
+               }
+
+               /*
+                * find the start of the gap in the search range (which
+                * should contain our unknown inode).  if the only irec
+                * within +/- 1 chunks starts after the inode we're
+                * looking for, skip this stuff since the end_agbno
+                * of the range has already been trimmed in to not
+                * include that irec.
+                */
+               if (irec_before_p->ino_startnum < agino)  {
+                       irec_p = irec_before_p;
+                       irec_next_p = next_ino_rec(irec_p);
+
+                       while(irec_next_p != NULL &&
+                               irec_p->ino_startnum + XFS_INODES_PER_CHUNK ==
+                                       irec_next_p->ino_startnum)  {
+                               irec_p = irec_next_p;
+                               irec_next_p = next_ino_rec(irec_next_p);
+                       }
+
+                       start_agbno = XFS_AGINO_TO_AGBNO(mp,
+                                               irec_p->ino_startnum) +
+                                               XFS_IALLOC_BLOCKS(mp);
+
+                       /*
+                        * we know that the inode we're trying to verify isn't
+                        * in an inode chunk so the next ino_rec marks the end
+                        * of the gap -- is it within the search range?
+                        */
+                       if (irec_next_p != NULL &&
+                                       agino + XFS_IALLOC_INODES(mp) >=
+                                               irec_next_p->ino_startnum)
+                               end_agbno = XFS_AGINO_TO_AGBNO(mp,
+                                               irec_next_p->ino_startnum);
+               }
+
+               ASSERT(start_agbno < end_agbno);
+       }
+
+       /*
+        * if the gap is too small to contain a chunk, we lose.
+        * this means that inode chunks known to be good surround
+        * the inode in question and that the space between them
+        * is too small for a legal inode chunk
+        */
+       if (end_agbno - start_agbno < XFS_IALLOC_BLOCKS(mp))
+               return(0);
+
+       /*
+        * now grunge around the disk, start at the inode block and
+        * go in each direction until you hit a non-inode block or
+        * run into a range boundary.  A non-inode block is block
+        * with *no* good inodes in it.  Unfortunately, we can't
+        * co-opt bad blocks into inode chunks (which might take
+        * care of disk blocks that turn into zeroes) because the
+        * filesystem could very well allocate two inode chunks
+        * with a one block file in between and we'd zap the file.
+        * We're better off just losing the rest of the
+        * inode chunk instead.
+        */
+       for (cur_agbno = agbno; cur_agbno >= start_agbno; cur_agbno--)  {
+               /*
+                * if the block has no inodes, it's a bad block so
+                * break out now without decrementing cur_agbno so
+                * chunk start blockno will be set to the last good block
+                */
+               if (!(irec_cnt = check_aginode_block(mp, agno, cur_agbno)))
+                       break;
+               ino_cnt += irec_cnt;
+       }
+
+       chunk_start_agbno = cur_agbno + 1;
+
+       for (cur_agbno = agbno + 1; cur_agbno < end_agbno; cur_agbno++)   {
+               /*
+                * if the block has no inodes, it's a bad block so
+                * break out now without incrementing cur_agbno so
+                * chunk start blockno will be set to the block
+                * immediately after the last good block.
+                */
+               if (!(irec_cnt = check_aginode_block(mp, agno, cur_agbno)))
+                       break;
+               ino_cnt += irec_cnt;
+       }
+
+       chunk_stop_agbno = cur_agbno;
+
+       num_blks = chunk_stop_agbno - chunk_start_agbno;
+
+       if (num_blks < XFS_IALLOC_BLOCKS(mp) || ino_cnt == 0)
+               return(0);
+
+       /*
+        * XXX - later - if the entire range is selected and they're all
+        * good inodes, keep searching in either direction.
+        * until you the range of inodes end, then split into chunks
+        * for now, just take one chunk's worth starting at the lowest
+        * possible point and hopefully we'll pick the rest up later.
+        *
+        * XXX - if we were going to fix up an inode chunk for
+        * any good inodes in the chunk, this is where we would
+        * do it.  For now, keep it simple and lose the rest of
+        * the chunk
+        */
+
+       if (num_blks % XFS_IALLOC_BLOCKS(mp) != 0)  {
+               num_blks = rounddown(num_blks, XFS_IALLOC_BLOCKS(mp));
+               chunk_stop_agbno = chunk_start_agbno + num_blks;
+       }
+
+       /*
+        * ok, we've got a candidate inode chunk.  now we have to
+        * verify that we aren't trying to use blocks that are already
+        * in use.  If so, mark them as multiply claimed since odds
+        * are very low that we found this chunk by stumbling across
+        * user data -- we're probably here as a result of a directory
+        * entry or an iunlinked pointer
+        */
+       for (j = 0, cur_agbno = chunk_start_agbno;
+                       cur_agbno < chunk_stop_agbno; cur_agbno++)  {
+               switch (state = get_agbno_state(mp, agno, cur_agbno))  {
+               case XR_E_MULT:
+               case XR_E_INUSE:
+               case XR_E_INUSE_FS:
+               case XR_E_FS_MAP:
+                       do_warn(
+                           "inode block %d/%d multiply claimed, (state %d)\n",
+                               agno, cur_agbno, state);
+                       set_agbno_state(mp, agno, cur_agbno, XR_E_MULT);
+                       j = 1;
+                       break;
+               case XR_E_INO:
+                       do_error(
+               "uncertain inode block overlap, agbno = %d, ino = %llu\n",
+                               agbno, ino);
+                       break;
+               default:
+                       break;
+               }
+
+               if (j)
+                       return(0);
+       }
+
+       /*
+        * ok, chunk is good.  put the record into the tree if required,
+        * and fill in the bitmap.  All inodes will be marked as "free"
+        * except for the one that led us to discover the chunk.  That's
+        * ok because we'll override the free setting later if the
+        * contents of the inode indicate it's in use.
+        */
+       start_agino = XFS_OFFBNO_TO_AGINO(mp, chunk_start_agbno, 0);
+       *start_ino = XFS_AGINO_TO_INO(mp, agno, start_agino);
+
+       ASSERT(find_inode_rec(agno, start_agino) == NULL);
+
+       irec_p = set_inode_free_alloc(agno, start_agino);
+       for (i = 1; i < XFS_INODES_PER_CHUNK; i++)
+               set_inode_free(irec_p, i);
+
+       ASSERT(start_agino <= agino &&
+                       start_agino + XFS_INODES_PER_CHUNK > agino);
+
+       set_inode_used(irec_p, agino - start_agino);
+
+       for (cur_agbno = chunk_start_agbno;
+                       cur_agbno < chunk_stop_agbno; cur_agbno++)  {
+               switch (state = get_agbno_state(mp, agno, cur_agbno))  {
+               case XR_E_INO:
+                       do_error("uncertain inode block %llu already known\n",
+                               XFS_AGB_TO_FSB(mp, agno, cur_agbno));
+                       break;
+               case XR_E_UNKNOWN:
+               case XR_E_FREE1:
+               case XR_E_FREE:
+                       set_agbno_state(mp, agno, cur_agbno, XR_E_INO);
+                       break;
+               case XR_E_MULT:
+               case XR_E_INUSE:
+               case XR_E_INUSE_FS:
+               case XR_E_FS_MAP:
+                       do_error(
+                           "inode block %d/%d multiply claimed, (state %d)\n",
+                               agno, cur_agbno, state);
+                       break;
+               default:
+                       do_warn("inode block %d/%d bad state, (state %d)\n",
+                               agno, cur_agbno, state);
+                       set_agbno_state(mp, agno, cur_agbno, XR_E_INO);
+                       break;
+               }
+       }
+
+       return(ino_cnt);
+}
+
+/*
+ * same as above only for ag inode chunks
+ */
+int
+verify_aginode_chunk(xfs_mount_t       *mp,
+                       xfs_agnumber_t  agno,
+                       xfs_agino_t     agino,
+                       xfs_agino_t     *agino_start)
+{
+       xfs_ino_t       ino;
+       int             res;
+
+       res = verify_inode_chunk(mp, XFS_AGINO_TO_INO(mp, agno, agino), &ino);
+
+       if (res)
+               *agino_start = XFS_INO_TO_AGINO(mp, ino);
+       else
+               *agino_start = NULLAGINO;
+
+       return(res);
+}
+
+/*
+ * this does the same as the two above only it returns a pointer
+ * to the inode record in the good inode tree
+ */
+ino_tree_node_t *
+verify_aginode_chunk_irec(xfs_mount_t  *mp,
+                       xfs_agnumber_t  agno,
+                       xfs_agino_t     agino)
+{
+       xfs_agino_t start_agino;
+       ino_tree_node_t *irec = NULL;
+
+       if (verify_aginode_chunk(mp, agno, agino, &start_agino))
+               irec = find_inode_rec(agno, start_agino);
+
+       return(irec);
+}
+
+
+
+/*
+ * processes an inode allocation chunk/block, returns 1 on I/O errors,
+ * 0 otherwise
+ *
+ * *bogus is set to 1 if the entire set of inodes is bad.
+ */
+/* ARGSUSED */
+int
+process_inode_chunk(xfs_mount_t *mp, xfs_agnumber_t agno, int num_inos,
+                       ino_tree_node_t *first_irec, int ino_discovery,
+                       int check_dups, int extra_attr_check, int *bogus)
+{
+       xfs_ino_t               parent;
+       ino_tree_node_t         *ino_rec;
+       xfs_buf_t               *bp;
+       xfs_dinode_t            *dino;
+       int                     icnt;
+       int                     status;
+       int                     is_used;
+       int                     state;
+       int                     done;
+       int                     ino_dirty;
+       int                     irec_offset;
+       int                     ibuf_offset;
+       xfs_agino_t             agino;
+       xfs_agblock_t           agbno;
+       int                     dirty = 0;
+       int                     cleared = 0;
+       int                     isa_dir = 0;
+
+       ASSERT(first_irec != NULL);
+       ASSERT(XFS_AGINO_TO_OFFSET(mp, first_irec->ino_startnum) == 0);
+
+       *bogus = 0;
+       ASSERT(XFS_IALLOC_BLOCKS(mp) > 0);
+
+       /*
+        * get all blocks required to read in this chunk (may wind up
+        * having to process more chunks in a multi-chunk per block fs)
+        */
+       agbno = XFS_AGINO_TO_AGBNO(mp, first_irec->ino_startnum);
+
+       bp = libxfs_readbuf(mp->m_dev, XFS_AGB_TO_DADDR(mp, agno, agbno),
+                       XFS_FSB_TO_BB(mp, XFS_IALLOC_BLOCKS(mp)), 0);
+       if (!bp) {
+               do_warn("cannot read inode %llu, disk block %lld, cnt %d\n",
+                       XFS_AGINO_TO_INO(mp, agno, first_irec->ino_startnum),
+                       XFS_AGB_TO_DADDR(mp, agno, agbno),
+                       (int)XFS_FSB_TO_BB(mp, XFS_IALLOC_BLOCKS(mp)));
+               return(1);
+       }
+
+       /*
+        * set up first irec
+        */
+       ino_rec = first_irec;
+       /*
+        * initialize counters
+        */
+       irec_offset = 0;
+       ibuf_offset = 0;
+       icnt = 0;
+       status = 0;
+       done = 0;
+
+       /*
+        * verify inode chunk if necessary
+        */
+       if (ino_discovery)  {
+               while (!done)  {
+                       /*
+                        * make inode pointer
+                        */
+                       dino = XFS_MAKE_IPTR(mp, bp, icnt);
+                       agino = irec_offset + ino_rec->ino_startnum;
+
+                       /*
+                        * we always think that the root and realtime
+                        * inodes are verified even though we may have
+                        * to reset them later to keep from losing the
+                        * chunk that they're in
+                        */
+                       if (verify_dinode(mp, dino, agno, agino) == 0 ||
+                                       agno == 0 &&
+                                       (mp->m_sb.sb_rootino == agino ||
+                                        mp->m_sb.sb_rsumino == agino ||
+                                        mp->m_sb.sb_rbmino == agino))
+                               status++;
+
+                       irec_offset++;
+                       icnt++;
+
+                       if (icnt == XFS_IALLOC_INODES(mp) &&
+                                       irec_offset == XFS_INODES_PER_CHUNK)  {
+                               /*
+                                * done! - finished up irec and block
+                                * simultaneously
+                                */
+                               libxfs_putbuf(bp);
+                               done = 1;
+                               break;
+                       } else if (irec_offset == XFS_INODES_PER_CHUNK)  {
+                               /*
+                                * get new irec (multiple chunks per block fs)
+                                */
+                               ino_rec = next_ino_rec(ino_rec);
+                               ASSERT(ino_rec->ino_startnum == agino + 1);
+                               irec_offset = 0;
+                       }
+               }
+
+               /*
+                * if chunk/block is bad, blow it off.  the inode records
+                * will be deleted by the caller if appropriate.
+                */
+               if (!status)  {
+                       *bogus = 1;
+                       if (!done) /* already free'd */
+                         libxfs_putbuf(bp);
+                       return(0);
+               }
+
+               /*
+                * reset irec and counters
+                */
+               ino_rec = first_irec;
+
+               irec_offset = 0;
+               ibuf_offset = 0;
+               icnt = 0;
+               status = 0;
+               done = 0;
+
+               /* nathans TODO ... memory leak here?: */
+
+               /*
+                * get first block
+                */
+               bp = libxfs_readbuf(mp->m_dev,
+                               XFS_AGB_TO_DADDR(mp, agno, agbno),
+                               XFS_FSB_TO_BB(mp, XFS_IALLOC_BLOCKS(mp)), 0);
+               if (!bp) {
+                       do_warn("can't read inode %llu, disk block %lld, "
+                               "cnt %d\n", XFS_AGINO_TO_INO(mp, agno, agino),
+                               XFS_AGB_TO_DADDR(mp, agno, agbno),
+                               (int)XFS_FSB_TO_BB(mp, XFS_IALLOC_BLOCKS(mp)));
+                       return(1);
+               }
+       }
+
+       /*
+        * mark block as an inode block in the incore bitmap
+        */
+       switch (state = get_agbno_state(mp, agno, agbno))  {
+       case XR_E_INO:  /* already marked */
+               break;
+       case XR_E_UNKNOWN:
+       case XR_E_FREE:
+       case XR_E_FREE1:
+               set_agbno_state(mp, agno, agbno, XR_E_INO);
+               break;
+       case XR_E_BAD_STATE:
+               do_error("bad state in block map %d\n", state);
+               break;
+       default:
+               set_agbno_state(mp, agno, agbno, XR_E_MULT);
+               do_warn("inode block %llu multiply claimed, state was %d\n",
+                       XFS_AGB_TO_FSB(mp, agno, agbno), state);
+               break;
+       }
+
+       while (!done)  {
+               /*
+                * make inode pointer
+                */
+               dino = XFS_MAKE_IPTR(mp, bp, icnt);
+               agino = irec_offset + ino_rec->ino_startnum;
+
+               is_used = 3;
+               ino_dirty = 0;
+               parent = 0;
+
+               status = process_dinode(mp, dino, agno, agino,
+                               is_inode_free(ino_rec, irec_offset),
+                               &ino_dirty, &cleared, &is_used,
+                               ino_discovery, check_dups,
+                               extra_attr_check, &isa_dir, &parent);
+
+               ASSERT(is_used != 3);
+               if (ino_dirty)
+                       dirty = 1;
+               /*
+                * XXX - if we want to try and keep
+                * track of whether we need to bang on
+                * the inode maps (instead of just
+                * blindly reconstructing them like
+                * we do now, this is where to start.
+                */
+               if (is_used)  {
+                       if (is_inode_free(ino_rec, irec_offset))  {
+                               if (verbose || no_modify ||
+                                   XFS_AGINO_TO_INO(mp, agno, agino) !=
+                                                       old_orphanage_ino)  {
+                                       do_warn("imap claims in-use inode %llu"
+                                               " is free, ",
+                                               XFS_AGINO_TO_INO(mp, agno,
+                                               agino));
+                               }
+
+                               if (verbose || (!no_modify &&
+                                   XFS_AGINO_TO_INO(mp, agno, agino) !=
+                                               old_orphanage_ino))
+                                       do_warn("correcting imap\n");
+                               else
+                                       do_warn("would correct imap\n");
+                       }
+                       set_inode_used(ino_rec, irec_offset);
+               } else  {
+                       set_inode_free(ino_rec, irec_offset);
+               }
+
+               /*
+                * if we lose the root inode, or it turns into
+                * a non-directory, that allows us to double-check
+                * later whether or not we need to reinitialize it.
+                */
+               if (isa_dir)  {
+                       set_inode_isadir(ino_rec, irec_offset);
+                       /*
+                        * we always set the parent but
+                        * we may as well wait until
+                        * phase 4 (no inode discovery)
+                        * because the parent info will
+                        * be solid then.
+                        */
+                       if (!ino_discovery)  {
+                               ASSERT(parent != 0);
+                               set_inode_parent(ino_rec, irec_offset, parent);
+                               ASSERT(parent ==
+                                       get_inode_parent(ino_rec, irec_offset));
+                       }
+               } else  {
+                       clear_inode_isadir(ino_rec, irec_offset);
+               }
+
+               if (status)  {
+                       if (mp->m_sb.sb_rootino ==
+                                       XFS_AGINO_TO_INO(mp, agno, agino))  {
+                               need_root_inode = 1;
+
+                               if (!no_modify)  {
+                                       do_warn("cleared root inode %llu\n",
+                                               XFS_AGINO_TO_INO(mp, agno,
+                                               agino));
+                               } else  {
+                                       do_warn("would clear root inode %llu\n",
+                                               XFS_AGINO_TO_INO(mp, agno,
+                                               agino));
+                               }
+                       } else if (mp->m_sb.sb_rbmino ==
+                                       XFS_AGINO_TO_INO(mp, agno, agino))  {
+                               need_rbmino = 1;
+
+                               if (!no_modify)  {
+                                       do_warn("cleared realtime bitmap "
+                                               "inode %llu\n",
+                                               XFS_AGINO_TO_INO(mp, agno,
+                                               agino));
+                               } else  {
+                                       do_warn("would clear realtime bitmap "
+                                               "inode %llu\n",
+                                               XFS_AGINO_TO_INO(mp, agno,
+                                               agino));
+                               }
+                       } else if (mp->m_sb.sb_rsumino ==
+                                       XFS_AGINO_TO_INO(mp, agno, agino))  {
+                               need_rsumino = 1;
+
+                               if (!no_modify)  {
+                                       do_warn("cleared realtime summary "
+                                               "inode %llu\n",
+                                               XFS_AGINO_TO_INO(mp, agno,
+                                               agino));
+                               } else  {
+                                       do_warn("would clear realtime summary "
+                                               "inode %llu\n",
+                                               XFS_AGINO_TO_INO(mp, agno,
+                                               agino));
+                               }
+                       } else if (!no_modify)  {
+                               do_warn("cleared inode %llu\n",
+                                       XFS_AGINO_TO_INO(mp, agno, agino));
+                       } else  {
+                               do_warn("would have cleared inode %llu\n",
+                                       XFS_AGINO_TO_INO(mp, agno, agino));
+                       }
+               }
+
+               irec_offset++;
+               ibuf_offset++;
+               icnt++;
+
+               if (icnt == XFS_IALLOC_INODES(mp) &&
+                               irec_offset == XFS_INODES_PER_CHUNK)  {
+                       /*
+                        * done! - finished up irec and block simultaneously
+                        */
+                       if (dirty && !no_modify)
+                               libxfs_writebuf(bp, 0);
+                       else
+                               libxfs_putbuf(bp);
+
+                       done = 1;
+                       break;
+               } else if (ibuf_offset == mp->m_sb.sb_inopblock)  {
+                       /*
+                        * mark block as an inode block in the incore bitmap
+                        * and reset inode buffer offset counter
+                        */
+                       ibuf_offset = 0;
+                       agbno++;
+
+                       switch (state = get_agbno_state(mp, agno, agbno))  {
+                       case XR_E_INO:  /* already marked */
+                               break;
+                       case XR_E_UNKNOWN:
+                       case XR_E_FREE:
+                       case XR_E_FREE1:
+                               set_agbno_state(mp, agno, agbno, XR_E_INO);
+                               break;
+                       case XR_E_BAD_STATE:
+                               do_error( "bad state in block map %d\n",
+                                       state);
+                               break;
+                       default:
+                               set_agbno_state(mp, agno, agbno, XR_E_MULT);
+                               do_warn("inode block %llu multiply claimed, "
+                                       "state was %d\n",
+                                       XFS_AGB_TO_FSB(mp, agno, agbno), state);
+                               break;
+                       }
+
+               } else if (irec_offset == XFS_INODES_PER_CHUNK)  {
+                       /*
+                        * get new irec (multiple chunks per block fs)
+                        */
+                       ino_rec = next_ino_rec(ino_rec);
+                       ASSERT(ino_rec->ino_startnum == agino + 1);
+                       irec_offset = 0;
+               }
+       }
+       return(0);
+}
+
+/*
+ * check all inodes mentioned in the ag's incore inode maps.
+ * the map may be incomplete.  If so, we'll catch the missing
+ * inodes (hopefully) when we traverse the directory tree.
+ * check_dirs is set to 1 if directory inodes should be
+ * processed for internal consistency, parent setting and
+ * discovery of unknown inodes.  this only happens
+ * in phase 3.  check_dups is set to 1 if we're looking for
+ * inodes that reference duplicate blocks so we can trash
+ * the inode right then and there.  this is set only in
+ * phase 4 after we've run through and set the bitmap once.
+ */
+void
+process_aginodes(xfs_mount_t *mp, xfs_agnumber_t agno,
+               int ino_discovery, int check_dups, int extra_attr_check)
+{
+       int num_inos, bogus;
+       ino_tree_node_t *ino_rec, *first_ino_rec, *prev_ino_rec;
+
+       first_ino_rec = ino_rec = findfirst_inode_rec(agno);
+       while (ino_rec != NULL)  {
+               /*
+                * paranoia - step through inode records until we step
+                * through a full allocation of inodes.  this could
+                * be an issue in big-block filesystems where a block
+                * can hold more than one inode chunk.  make sure to
+                * grab the record corresponding to the beginning of
+                * the next block before we call the processing routines.
+                */
+               num_inos = XFS_INODES_PER_CHUNK;
+               while (num_inos < XFS_IALLOC_INODES(mp) && ino_rec != NULL)  {
+                       ASSERT(ino_rec != NULL);
+                       /*
+                        * inodes chunks will always be aligned and sized
+                        * correctly
+                        */
+                       if ((ino_rec = next_ino_rec(ino_rec)) != NULL)
+                               num_inos += XFS_INODES_PER_CHUNK;
+               }
+
+               ASSERT(num_inos == XFS_IALLOC_INODES(mp));
+
+               if (process_inode_chunk(mp, agno, num_inos, first_ino_rec,
+                               ino_discovery, check_dups, extra_attr_check, &bogus))  {
+                       /* XXX - i/o error, we've got a problem */
+                       abort();
+               }
+
+               if (!bogus)
+                       first_ino_rec = ino_rec = next_ino_rec(ino_rec);
+               else  {
+                       /*
+                        * inodes pointed to by this record are
+                        * completely bogus, blow the records for
+                        * this chunk out.
+                        * the inode block(s) will get reclaimed
+                        * in phase 4 when the block map is
+                        * reconstructed after inodes claiming
+                        * duplicate blocks are deleted.
+                        */
+                       num_inos = 0;
+                       ino_rec = first_ino_rec;
+                       while (num_inos < XFS_IALLOC_INODES(mp) &&
+                                       ino_rec != NULL)  {
+                               prev_ino_rec = ino_rec;
+
+                               if ((ino_rec = next_ino_rec(ino_rec)) != NULL)
+                                       num_inos += XFS_INODES_PER_CHUNK;
+
+                               get_inode_rec(agno, prev_ino_rec);
+                               free_inode_rec(agno, prev_ino_rec);
+                       }
+
+                       first_ino_rec = ino_rec;
+               }
+       }
+}
+
+/*
+ * verify the uncertain inode list for an ag.
+ * Good inodes get moved into the good inode tree.
+ * returns 0 if there are no uncertain inode records to
+ * be processed, 1 otherwise.  This routine destroys the
+ * the entire uncertain inode tree for the ag as a side-effect.
+ */
+void
+check_uncertain_aginodes(xfs_mount_t *mp, xfs_agnumber_t agno)
+{
+       ino_tree_node_t         *irec;
+       ino_tree_node_t         *nrec;
+       xfs_agino_t             start;
+       xfs_agino_t             i;
+       xfs_agino_t             agino;
+       int                     got_some;
+
+       nrec = NULL;
+       got_some = 0;
+
+       clear_uncertain_ino_cache(agno);
+
+       if ((irec = findfirst_uncertain_inode_rec(agno)) == NULL)
+               return;
+
+       /*
+        * the trick here is to find a contiguous range
+        * of inodes, make sure that it doesn't overlap
+        * with a known to exist chunk, and then make
+        * sure it is a number of entire chunks.
+        * we check on-disk once we have an idea of what's
+        * going on just to double-check.
+        *
+        * process the uncertain inode record list and look
+        * on disk to see if the referenced inodes are good
+        */
+
+       do_warn("found inodes not in the inode allocation tree\n");
+
+       do {
+               /*
+                * check every confirmed (which in this case means
+                * inode that we really suspect to be an inode) inode
+                */
+               for (i = 0; i < XFS_INODES_PER_CHUNK; i++)  {
+                       if (!is_inode_confirmed(irec, i))
+                               continue;
+
+                       agino = i + irec->ino_startnum;
+
+                       if (verify_aginum(mp, agno, agino))
+                               continue;
+
+                       if (nrec != NULL && nrec->ino_startnum <= agino &&
+                                       agino < nrec->ino_startnum +
+                                       XFS_INODES_PER_CHUNK)
+                               continue;
+
+                       if ((nrec = find_inode_rec(agno, agino)) == NULL)
+                               if (!verify_aginum(mp, agno, agino))
+                                       if (verify_aginode_chunk(mp, agno,
+                                                       agino, &start))
+                                               got_some = 1;
+               }
+
+               get_uncertain_inode_rec(agno, irec);
+               free_inode_rec(agno, irec);
+
+               irec = findfirst_uncertain_inode_rec(agno);
+       } while (irec != NULL);
+
+       if (got_some)
+               do_warn("found inodes not in the inode allocation tree\n");
+
+       return;
+}
+
+/*
+ * verify and process the uncertain inodes for an ag.
+ * this is different from check_ in that we can't just
+ * move the good inodes into the good inode tree and let
+ * process_aginodes() deal with them because this gets called
+ * after process_aginodes() has been run on the ag inode tree.
+ * So we have to process the inodes as well as verify since
+ * we don't want to rerun process_aginodes() on a tree that has
+ * mostly been processed.
+ *
+ * Note that if this routine does process some inodes, it can
+ * add uncertain inodes to any ag which would require that
+ * the routine be called again to process those newly-added
+ * uncertain inodes.
+ *
+ * returns 0 if no inodes were processed and 1 if inodes
+ * were processed (and it is possible that new uncertain
+ * inodes were discovered).
+ *
+ * as a side-effect, this routine tears down the uncertain
+ * inode tree for the ag.
+ */
+int
+process_uncertain_aginodes(xfs_mount_t *mp, xfs_agnumber_t agno)
+{
+       ino_tree_node_t         *irec;
+       ino_tree_node_t         *nrec;
+       xfs_agino_t             agino;
+       int                     i;
+       int                     bogus;
+       int                     cnt;
+       int                     got_some;
+
+#ifdef XR_INODE_TRACE
+       fprintf(stderr, "in process_uncertain_aginodes, agno = %d\n", agno);
+#endif
+
+       got_some = 0;
+
+       clear_uncertain_ino_cache(agno);
+
+       if ((irec = findfirst_uncertain_inode_rec(agno)) == NULL)
+               return(0);
+
+       nrec = NULL;
+
+       do  {
+               /*
+                * check every confirmed inode
+                */
+               for (cnt = i = 0; i < XFS_INODES_PER_CHUNK; i++)  {
+                       if (!is_inode_confirmed(irec, i))
+                               continue;
+                       cnt++;
+                       agino = i + irec->ino_startnum;
+#ifdef XR_INODE_TRACE
+       fprintf(stderr, "ag inode = %d (0x%x)\n", agino, agino);
+#endif
+                       /*
+                        * skip over inodes already processed (in the
+                        * good tree), bad inode numbers, and inode numbers
+                        * pointing to bogus inodes
+                        */
+                       if (verify_aginum(mp, agno, agino))
+                               continue;
+
+                       if (nrec != NULL && nrec->ino_startnum <= agino &&
+                                       agino < nrec->ino_startnum +
+                                       XFS_INODES_PER_CHUNK)
+                               continue;
+
+                       if ((nrec = find_inode_rec(agno, agino)) != NULL)
+                               continue;
+
+                       /*
+                        * verify the chunk.  if good, it will be
+                        * added to the good inode tree.
+                        */
+                       if ((nrec = verify_aginode_chunk_irec(mp,
+                                               agno, agino)) == NULL)
+                               continue;
+
+                       got_some = 1;
+
+                       /*
+                        * process the inode record we just added
+                        * to the good inode tree.  The inode
+                        * processing may add more records to the
+                        * uncertain inode lists.
+                        */
+                       if (process_inode_chunk(mp, agno, XFS_IALLOC_INODES(mp),
+                                               nrec, 1, 0, 0, &bogus))  {
+                               /* XXX - i/o error, we've got a problem */
+                               abort();
+                       }
+               }
+
+               ASSERT(cnt != 0);
+               /*
+                * now return the uncertain inode record to the free pool
+                * and pull another one off the list for processing
+                */
+               get_uncertain_inode_rec(agno, irec);
+               free_inode_rec(agno, irec);
+
+               irec = findfirst_uncertain_inode_rec(agno);
+       } while (irec != NULL);
+
+       if (got_some)
+               do_warn("found inodes not in the inode allocation tree\n");
+
+       return(1);
+}
diff --git a/repair/dinode.c b/repair/dinode.c
new file mode 100644 (file)
index 0000000..2dcd982
--- /dev/null
@@ -0,0 +1,2914 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "avl.h"
+#include "globals.h"
+#include "agheader.h"
+#include "incore.h"
+#include "protos.h"
+#include "err_protos.h"
+#include "dir.h"
+#include "dir2.h"
+#include "dinode.h"
+#include "scan.h"
+#include "versions.h"
+#include "attr_repair.h"
+#include "bmap.h"
+
+/*
+ * inode clearing routines
+ */
+
+/*
+ * return the offset into the inode where the attribute fork starts
+ */
+/* ARGSUSED */
+int
+calc_attr_offset(xfs_mount_t *mp, xfs_dinode_t *dino)
+{
+       xfs_dinode_core_t       *dinoc = &dino->di_core;
+       int                     offset = ((__psint_t) &dino->di_u)
+                                               - (__psint_t)dino;
+
+       /*
+        * don't worry about alignment when calculating offset
+        * because the data fork is already 8-byte aligned
+        */
+       switch (dinoc->di_format)  {
+       case XFS_DINODE_FMT_DEV:
+               offset += sizeof(dev_t);
+               break;
+       case XFS_DINODE_FMT_LOCAL:
+               offset += INT_GET(dinoc->di_size, ARCH_CONVERT);
+               break;
+       case XFS_DINODE_FMT_UUID:
+               offset += sizeof(uuid_t);
+               break;
+       case XFS_DINODE_FMT_EXTENTS:
+               offset += INT_GET(dinoc->di_nextents, ARCH_CONVERT) * sizeof(xfs_bmbt_rec_32_t);
+               break;
+       case XFS_DINODE_FMT_BTREE:
+               offset += INT_GET(dino->di_u.di_bmbt.bb_numrecs, ARCH_CONVERT) * sizeof(xfs_bmbt_rec_32_t);
+               break;
+       default:
+               do_error("Unknown inode format.\n");
+               abort();
+               break;
+       }
+
+       return(offset);
+}
+
+/* ARGSUSED */
+int
+clear_dinode_attr(xfs_mount_t *mp, xfs_dinode_t *dino, xfs_ino_t ino_num)
+{
+       xfs_dinode_core_t *dinoc = &dino->di_core;
+
+       ASSERT(dinoc->di_forkoff != 0);
+
+       if (!no_modify)
+               fprintf(stderr, "clearing inode %llu attributes \n", ino_num);
+       else
+               fprintf(stderr, "would have cleared inode %llu attributes\n",
+                       ino_num);
+
+       if (INT_GET(dinoc->di_anextents, ARCH_CONVERT) != 0)  {
+               if (no_modify)
+                       return(1);
+               INT_ZERO(dinoc->di_anextents, ARCH_CONVERT);
+       }
+
+       if (dinoc->di_aformat != XFS_DINODE_FMT_EXTENTS)  {
+               if (no_modify)
+                       return(1);
+               dinoc->di_aformat = XFS_DINODE_FMT_EXTENTS;
+       }
+
+       /* get rid of the fork by clearing forkoff */
+
+       /* Originally, when the attr repair code was added, the fork was cleared
+        * by turning it into shortform status.  This meant clearing the
+        * hdr.totsize/count fields and also changing aformat to LOCAL
+        * (vs EXTENTS).  Over various fixes, the aformat and forkoff have
+        * been updated to not show an attribute fork at all, however.
+        * It could be possible that resetting totsize/count are not needed,
+        * but just to be safe, leave it in for now. 
+        */
+
+       if (!no_modify) {
+               xfs_attr_shortform_t *asf = (xfs_attr_shortform_t *)
+                               XFS_DFORK_APTR_ARCH(dino, ARCH_CONVERT);
+               INT_SET(asf->hdr.totsize, ARCH_CONVERT,
+                       sizeof(xfs_attr_sf_hdr_t));
+               INT_SET(asf->hdr.count, ARCH_CONVERT, 0);
+               dinoc->di_forkoff = 0;  /* got to do this after asf is set */
+       }
+
+       /*
+        * always returns 1 since the fork gets zapped
+        */
+       return(1);
+}
+
+/* ARGSUSED */
+int
+clear_dinode_core(xfs_dinode_core_t *dinoc, xfs_ino_t ino_num)
+{
+       int dirty = 0;
+
+       if (INT_GET(dinoc->di_magic, ARCH_CONVERT) != XFS_DINODE_MAGIC)  {
+               dirty = 1;
+
+               if (no_modify)
+                       return(1);
+
+               INT_SET(dinoc->di_magic, ARCH_CONVERT, XFS_DINODE_MAGIC);
+       }
+
+       if (!XFS_DINODE_GOOD_VERSION(dinoc->di_version) ||
+           (!fs_inode_nlink && dinoc->di_version > XFS_DINODE_VERSION_1))  {
+               dirty = 1;
+
+               if (no_modify)
+                       return(1);
+
+               dinoc->di_version = (fs_inode_nlink) ? XFS_DINODE_VERSION_2
+                                               : XFS_DINODE_VERSION_1;
+       }
+
+       if (INT_GET(dinoc->di_mode, ARCH_CONVERT) != 0)  {
+               dirty = 1;
+
+               if (no_modify)
+                       return(1);
+
+               INT_ZERO(dinoc->di_mode, ARCH_CONVERT);
+       }
+
+       if (INT_GET(dinoc->di_flags, ARCH_CONVERT) != 0)  {
+               dirty = 1;
+
+               if (no_modify)
+                       return(1);
+
+               INT_ZERO(dinoc->di_flags, ARCH_CONVERT);
+       }
+
+       if (INT_GET(dinoc->di_dmevmask, ARCH_CONVERT) != 0)  {
+               dirty = 1;
+
+               if (no_modify)
+                       return(1);
+
+               INT_ZERO(dinoc->di_dmevmask, ARCH_CONVERT);
+       }
+
+       if (dinoc->di_forkoff != 0)  {
+               dirty = 1;
+
+               if (no_modify)
+                       return(1);
+
+               dinoc->di_forkoff = 0;
+       }
+
+       if (dinoc->di_format != XFS_DINODE_FMT_EXTENTS)  {
+               dirty = 1;
+
+               if (no_modify)
+                       return(1);
+
+               dinoc->di_format = XFS_DINODE_FMT_EXTENTS;
+       }
+
+       if (dinoc->di_aformat != XFS_DINODE_FMT_EXTENTS)  {
+               dirty = 1;
+
+               if (no_modify)
+                       return(1);
+
+               dinoc->di_aformat = XFS_DINODE_FMT_EXTENTS;
+       }
+
+       if (INT_GET(dinoc->di_size, ARCH_CONVERT) != 0)  {
+               dirty = 1;
+
+               if (no_modify)
+                       return(1);
+
+               INT_ZERO(dinoc->di_size, ARCH_CONVERT);
+       }
+
+       if (INT_GET(dinoc->di_nblocks, ARCH_CONVERT) != 0)  {
+               dirty = 1;
+
+               if (no_modify)
+                       return(1);
+
+               INT_ZERO(dinoc->di_nblocks, ARCH_CONVERT);
+       }
+
+       if (INT_GET(dinoc->di_onlink, ARCH_CONVERT) != 0)  {
+               dirty = 1;
+
+               if (no_modify)
+                       return(1);
+
+               INT_ZERO(dinoc->di_onlink, ARCH_CONVERT);
+       }
+
+       if (INT_GET(dinoc->di_nextents, ARCH_CONVERT) != 0)  {
+               dirty = 1;
+
+               if (no_modify)
+                       return(1);
+
+               INT_ZERO(dinoc->di_nextents, ARCH_CONVERT);
+       }
+
+       if (INT_GET(dinoc->di_anextents, ARCH_CONVERT) != 0)  {
+               dirty = 1;
+
+               if (no_modify)
+                       return(1);
+
+               INT_ZERO(dinoc->di_anextents, ARCH_CONVERT);
+       }
+
+       if (dinoc->di_version > XFS_DINODE_VERSION_1 &&
+                       INT_GET(dinoc->di_nlink, ARCH_CONVERT) != 0)  {
+               dirty = 1;
+
+               if (no_modify)
+                       return(1);
+
+               INT_ZERO(dinoc->di_nlink, ARCH_CONVERT);
+       }
+
+       return(dirty);
+}
+
+/* ARGSUSED */
+int
+clear_dinode_unlinked(xfs_mount_t *mp, xfs_dinode_t *dino)
+{
+
+       if (dino->di_next_unlinked != NULLAGINO)  {
+               if (!no_modify)
+                       dino->di_next_unlinked = NULLAGINO;
+               return(1);
+       }
+
+       return(0);
+}
+
+/*
+ * this clears the unlinked list too so it should not be called
+ * until after the agi unlinked lists are walked in phase 3.
+ * returns > zero if the inode has been altered while being cleared
+ */
+int
+clear_dinode(xfs_mount_t *mp, xfs_dinode_t *dino, xfs_ino_t ino_num)
+{
+       int dirty;
+
+       dirty = clear_dinode_core(&dino->di_core, ino_num);
+       dirty += clear_dinode_unlinked(mp, dino);
+
+       /* and clear the forks */
+
+       if (dirty && !no_modify)
+               bzero(&dino->di_u, XFS_LITINO(mp));
+
+       return(dirty);
+}
+
+
+/*
+ * misc. inode-related utility routines
+ */
+
+/*
+ * returns 0 if inode number is valid, 1 if bogus
+ */
+int
+verify_inum(xfs_mount_t                *mp,
+               xfs_ino_t       ino)
+{
+       xfs_agnumber_t  agno;
+       xfs_agino_t     agino;
+       xfs_agblock_t   agbno;
+       xfs_sb_t        *sbp = &mp->m_sb;;
+
+       /* range check ag #, ag block.  range-checking offset is pointless */
+
+       agno = XFS_INO_TO_AGNO(mp, ino);
+       agino = XFS_INO_TO_AGINO(mp, ino);
+       agbno = XFS_AGINO_TO_AGBNO(mp, agino);
+
+       if (ino == 0 || ino == NULLFSINO)
+               return(1);
+
+       if (ino != XFS_AGINO_TO_INO(mp, agno, agino))
+               return(1);
+
+       if (agno >= sbp->sb_agcount ||
+               (agno < sbp->sb_agcount && agbno >= sbp->sb_agblocks) ||
+               (agno == sbp->sb_agcount && agbno >= sbp->sb_dblocks -
+                               (sbp->sb_agcount-1) * sbp->sb_agblocks) ||
+               (agbno == 0))
+               return(1);
+
+       return(0);
+}
+
+/*
+ * have a separate routine to ensure that we don't accidentally
+ * lose illegally set bits in the agino by turning it into an FSINO
+ * to feed to the above routine
+ */
+int
+verify_aginum(xfs_mount_t      *mp,
+               xfs_agnumber_t  agno,
+               xfs_agino_t     agino)
+{
+       xfs_agblock_t   agbno;
+       xfs_sb_t        *sbp = &mp->m_sb;;
+
+       /* range check ag #, ag block.  range-checking offset is pointless */
+
+       if (agino == 0 || agino == NULLAGINO)
+               return(1);
+
+       /*
+        * agino's can't be too close to NULLAGINO because the min blocksize
+        * is 9 bits and at most 1 bit of that gets used for the inode offset
+        * so if the agino gets shifted by the # of offset bits and compared
+        * to the legal agbno values, a bogus agino will be too large.  there
+        * will be extra bits set at the top that shouldn't be set.
+        */
+       agbno = XFS_AGINO_TO_AGBNO(mp, agino);
+
+       if (agno >= sbp->sb_agcount ||
+               (agno < sbp->sb_agcount && agbno >= sbp->sb_agblocks) ||
+               (agno == sbp->sb_agcount && agbno >= sbp->sb_dblocks -
+                               (sbp->sb_agcount-1) * sbp->sb_agblocks) ||
+               (agbno == 0))
+               return(1);
+
+       return(0);
+}
+
+/*
+ * return 1 if block number is good, 0 if out of range
+ */
+int
+verify_dfsbno(xfs_mount_t      *mp,
+               xfs_dfsbno_t    fsbno)
+{
+       xfs_agnumber_t  agno;
+       xfs_agblock_t   agbno;
+       xfs_sb_t        *sbp = &mp->m_sb;;
+
+       /* range check ag #, ag block.  range-checking offset is pointless */
+
+       agno = XFS_FSB_TO_AGNO(mp, fsbno);
+       agbno = XFS_FSB_TO_AGBNO(mp, fsbno);
+
+       if (agno >= sbp->sb_agcount ||
+               (agno < sbp->sb_agcount && agbno >= sbp->sb_agblocks) ||
+               (agno == sbp->sb_agcount && agbno >= sbp->sb_dblocks -
+                               (sbp->sb_agcount-1) * sbp->sb_agblocks))
+               return(0);
+
+       return(1);
+}
+
+int
+verify_agbno(xfs_mount_t       *mp,
+               xfs_agnumber_t  agno,
+               xfs_agblock_t   agbno)
+{
+       xfs_sb_t        *sbp = &mp->m_sb;;
+
+       /* range check ag #, ag block.  range-checking offset is pointless */
+
+       if (agno >= sbp->sb_agcount ||
+               (agno < sbp->sb_agcount && agbno >= sbp->sb_agblocks) ||
+               (agno == sbp->sb_agcount && agbno >= sbp->sb_dblocks -
+                               (sbp->sb_agcount-1) * sbp->sb_agblocks))
+               return(0);
+
+       return(1);
+}
+
+void
+convert_extent(
+       xfs_bmbt_rec_32_t       *rp,
+       xfs_dfiloff_t           *op,    /* starting offset (blockno in file) */
+       xfs_dfsbno_t            *sp,    /* starting block (fs blockno) */
+       xfs_dfilblks_t          *cp,    /* blockcount */
+       int                     *fp)    /* extent flag */
+{
+       xfs_bmbt_irec_t irec, *s = &irec;
+       /* Just use the extent parsing routine from the kernel */
+       libxfs_bmbt_get_all((xfs_bmbt_rec_t *)rp, s);
+
+       if (fs_has_extflgbit)  {
+               if (s->br_state == XFS_EXT_UNWRITTEN) {
+                       *fp = 1;
+               } else {
+                       *fp = 0;
+               }
+       } else  {
+               *fp = 0;
+       }
+       *op = s->br_startoff;
+       *sp = s->br_startblock;
+       *cp = s->br_blockcount;
+}
+
+/*
+ * return address of block fblock if it's within the range described
+ * by the extent list.  Otherwise, returns a null address.
+ */
+/* ARGSUSED */
+xfs_dfsbno_t
+get_bmbt_reclist(
+       xfs_mount_t             *mp,
+       xfs_bmbt_rec_32_t       *rp,
+       int                     numrecs,
+       xfs_dfiloff_t           fblock)
+{
+       int                     i;
+       xfs_dfilblks_t          cnt;
+       xfs_dfiloff_t           off_bno;
+       xfs_dfsbno_t            start;
+       int                     flag;
+
+       for (i = 0; i < numrecs; i++, rp++) {
+               convert_extent(rp, &off_bno, &start, &cnt, &flag);
+               if (off_bno >= fblock && off_bno + cnt < fblock)
+                       return(start + fblock - off_bno);
+       }
+
+       return(NULLDFSBNO);
+}
+
+/*
+ * return 1 if inode should be cleared, 0 otherwise
+ * if check_dups should be set to 1, that implies that
+ * the primary purpose of this call is to see if the
+ * file overlaps with any duplicate extents (in the
+ * duplicate extent list).
+ */
+/* ARGSUSED */
+int
+process_bmbt_reclist_int(
+       xfs_mount_t             *mp,
+       xfs_bmbt_rec_32_t       *rp,
+       int                     numrecs,
+       int                     type,
+       xfs_ino_t               ino,
+       xfs_drfsbno_t           *tot,
+       blkmap_t                **blkmapp,
+       xfs_dfiloff_t           *first_key,
+       xfs_dfiloff_t           *last_key,
+       int                     check_dups,
+       int                     whichfork)
+{
+       xfs_dfsbno_t            b;
+       xfs_drtbno_t            ext;
+       xfs_dfilblks_t          c;              /* count */
+       xfs_dfilblks_t          cp = 0;         /* prev count */
+       xfs_dfsbno_t            s;              /* start */
+       xfs_dfsbno_t            sp = 0;         /* prev start */
+       xfs_dfiloff_t           o = 0;          /* offset */
+       xfs_dfiloff_t           op = 0;         /* prev offset */
+       char                    *ftype;
+       char                    *forkname;
+       int                     i;
+       int                     state;
+       int                     flag;           /* extent flag */
+
+       if (whichfork == XFS_DATA_FORK)
+               forkname = "data";
+       else
+               forkname = "attr";
+
+       if (type == XR_INO_RTDATA)
+               ftype = "real-time";
+       else
+               ftype = "regular";
+
+       for (i = 0; i < numrecs; i++, rp++) {
+               convert_extent(rp, &o, &s, &c, &flag);
+               if (i == 0)
+                       *last_key = *first_key = o;
+               else
+                       *last_key = o;
+               if (i > 0 && op + cp > o)  {
+                       do_warn(
+"bmap rec out of order, inode %llu entry %d [o s c] [%llu %llu %llu], %d [%llu %llu %llu]\n",
+                               ino, i, o, s, c, i-1, op, sp, cp);
+                       return(1);
+               }
+               op = o;
+               cp = c;
+               sp = s;
+
+               /*
+                * check numeric validity of the extent
+                */
+               if (c == 0)  {
+                       do_warn(
+               "zero length extent (off = %llu, fsbno = %llu) in ino %llu\n",
+                               o, s, ino);
+                       return(1);
+               }
+               if (type == XR_INO_RTDATA) {
+                       if (s >= mp->m_sb.sb_rblocks)  {
+                               do_warn(
+"inode %llu - bad rt extent starting block number %llu, offset %llu\n",
+                                       ino, s, o);
+                               return(1);
+                       }
+                       if (s + c - 1 >= mp->m_sb.sb_rblocks)  {
+                               do_warn(
+"inode %llu - bad rt extent last block number %llu, offset %llu\n",
+                                       ino, s + c - 1, o);
+                               return(1);
+                       }
+                       if (s + c - 1 < s)  {
+                               do_warn(
+"inode %llu - bad rt extent overflows - start %llu, end %llu, offset %llu\n",
+                                       ino, s, s + c - 1, o);
+                               return(1);
+                       }
+               } else  {
+                       if (!verify_dfsbno(mp, s))  {
+                               do_warn(
+"inode %llu - bad extent starting block number %llu, offset %llu\n",
+                                       ino, s, o);
+                               return(1);
+                       }
+                       if (!verify_dfsbno(mp, s + c - 1))  {
+                               do_warn(
+"inode %llu - bad extent last block number %llu, offset %llu\n",
+                                       ino, s + c - 1, o);
+                               return(1);
+                       }
+                       if (s + c - 1 < s)  {
+                               do_warn(
+"inode %llu - bad extent overflows - start %llu, end %llu, offset %llu\n",
+                                       ino, s, s + c - 1, o);
+                               return(1);
+                       }
+                       if (o >= fs_max_file_offset)  {
+                               do_warn(
+"inode %llu - extent offset too large - start %llu, count %llu, offset %llu\n",
+                                       ino, s, c, o);
+                               return(1);
+                       }
+               }
+
+               /*
+                * realtime file data fork
+                */
+               if (type == XR_INO_RTDATA && whichfork == XFS_DATA_FORK)  {
+                       /*
+                        * XXX - verify that the blocks listed in the record
+                        * are multiples of an extent
+                        */
+                       if (s % mp->m_sb.sb_rextsize != 0 ||
+                                       c % mp->m_sb.sb_rextsize != 0)  {
+                               do_warn(
+"malformed rt inode extent [%llu %llu] (fs rtext size = %u)\n",
+                                       s, c, mp->m_sb.sb_rextsize);
+                               return(1);
+                       }
+
+                       /*
+                        * XXX - set the appropriate number of extents
+                        */
+                       for (b = s; b < s + c; b += mp->m_sb.sb_rextsize)  {
+                               ext = (xfs_drtbno_t) b / mp->m_sb.sb_rextsize;
+
+                               if (check_dups == 1)  {
+                                       if (search_rt_dup_extent(mp, ext))  {
+                                               do_warn(
+"data fork in rt ino %llu claims dup rt extent, off - %llu, start - %llu, count %llu\n",
+                                                       ino, o, s, c);
+                                               return(1);
+                                       }
+                                       continue;
+                               }
+
+                               state = get_rtbno_state(mp, ext);
+
+                               switch (state)  {
+                               case XR_E_FREE:
+/* XXX - turn this back on after we
+       run process_rtbitmap() in phase2
+                                       do_warn(
+                       "%s fork in rt ino %llu claims free rt block %llu\n",
+                                               forkname, ino, ext);
+*/
+                                       /* fall through ... */
+                               case XR_E_UNKNOWN:
+                                       set_rtbno_state(mp, ext, XR_E_INUSE);
+                                       break;
+                               case XR_E_BAD_STATE:
+                                       do_error(
+                               "bad state in rt block map %llu\n", ext);
+                                       abort();
+                                       break;
+                               case XR_E_FS_MAP:
+                               case XR_E_INO:
+                               case XR_E_INUSE_FS:
+                                       do_error(
+       "%s fork in rt inode %llu found metadata block %llu in %s bmap\n",
+                                               forkname, ino, ext, ftype);
+                               case XR_E_INUSE:
+                               case XR_E_MULT:
+                                       set_rtbno_state(mp, ext, XR_E_MULT);
+                                       do_warn(
+                       "%s fork in rt inode %llu claims used rt block %llu\n",
+                                               forkname, ino, ext);
+                                       return(1);
+                               case XR_E_FREE1:
+                               default:
+                                       do_error(
+                               "illegal state %d in %s block map %llu\n",
+                                               state, ftype, b);
+                               }
+                       }
+
+                       /*
+                        * bump up the block counter
+                        */
+                       *tot += c;
+
+                       /*
+                        * skip rest of loop processing since that's
+                        * all for regular file forks and attr forks
+                        */
+                       continue;
+               }
+
+       
+               /*
+                * regular file data fork or attribute fork
+                */
+               if (blkmapp && *blkmapp)
+                       blkmap_set_ext(blkmapp, o, s, c);
+               for (b = s; b < s + c; b++)  {
+                       if (check_dups == 1)  {
+                               /*
+                                * if we're just checking the bmap for dups,
+                                * return if we find one, otherwise, continue
+                                * checking each entry without setting the
+                                * block bitmap
+                                */
+                               if (search_dup_extent(mp,
+                                                   XFS_FSB_TO_AGNO(mp, b),
+                                                   XFS_FSB_TO_AGBNO(mp, b)))  {
+                                       do_warn(
+"%s fork in ino %llu claims dup extent, off - %llu, start - %llu, cnt %llu\n",
+                                               forkname, ino, o, s, c);
+                                       return(1);
+                               }
+                               continue;
+                       }
+
+                       /* FIX FOR BUG 653709 -- EKN 
+                        * realtime attribute fork, should be valid block number
+                        * in regular data space, not realtime partion.
+                        */
+                       if (type == XR_INO_RTDATA && whichfork == XFS_ATTR_FORK) {
+                         if (mp->m_sb.sb_agcount < XFS_FSB_TO_AGNO(mp, b))
+                               return(1);
+                       }       
+               
+                       state = get_fsbno_state(mp, b);
+                       switch (state)  {
+                       case XR_E_FREE:
+                       case XR_E_FREE1:
+                               do_warn(
+                               "%s fork in ino %llu claims free block %llu\n",
+                                       forkname, ino, (__uint64_t) b);
+                               /* fall through ... */
+                       case XR_E_UNKNOWN:
+                               set_fsbno_state(mp, b, XR_E_INUSE);
+                               break;
+                       case XR_E_BAD_STATE:
+                               do_error("bad state in block map %llu\n", b);
+                               abort();
+                               break;
+                       case XR_E_FS_MAP:
+                       case XR_E_INO:
+                       case XR_E_INUSE_FS:
+                               do_warn(
+                               "%s fork in inode %llu claims metadata block %llu\n",
+                                       forkname, ino, (__uint64_t) b);
+                               return(1);
+                       case XR_E_INUSE:
+                       case XR_E_MULT:
+                               set_fsbno_state(mp, b, XR_E_MULT);
+                               do_warn(
+                               "%s fork in %s inode %llu claims used block %llu\n",
+                                       forkname, ftype, ino, (__uint64_t) b);
+                               return(1);
+                       default:
+                               do_error("illegal state %d in block map %llu\n",
+                                       state, b);
+                               abort();
+                       }
+               }
+               *tot += c;
+       }
+
+       return(0);
+}
+
+/*
+ * return 1 if inode should be cleared, 0 otherwise, sets block bitmap
+ * as a side-effect
+ */
+int
+process_bmbt_reclist(
+       xfs_mount_t             *mp,
+       xfs_bmbt_rec_32_t       *rp,
+       int                     numrecs,
+       int                     type,
+       xfs_ino_t               ino,
+       xfs_drfsbno_t           *tot,
+       blkmap_t                **blkmapp,
+       xfs_dfiloff_t           *first_key,
+       xfs_dfiloff_t           *last_key,
+       int                     whichfork)
+{
+       return(process_bmbt_reclist_int(mp, rp, numrecs, type, ino, tot,
+                                       blkmapp, first_key, last_key, 0,
+                                       whichfork));
+}
+
+/*
+ * return 1 if inode should be cleared, 0 otherwise, does not set
+ * block bitmap
+ */
+int
+scan_bmbt_reclist(
+       xfs_mount_t             *mp,
+       xfs_bmbt_rec_32_t       *rp,
+       int                     numrecs,
+       int                     type,
+       xfs_ino_t               ino,
+       xfs_drfsbno_t           *tot,
+       int                     whichfork)
+{
+       xfs_dfiloff_t           first_key = 0;
+       xfs_dfiloff_t           last_key = 0;
+
+       return(process_bmbt_reclist_int(mp, rp, numrecs, type, ino, tot,
+                                       NULL, &first_key, &last_key, 1,
+                                       whichfork));
+}
+
+/*
+ * these two are meant for routines that read and work with inodes
+ * one at a time where the inodes may be in any order (like walking
+ * the unlinked lists to look for inodes).  the caller is responsible
+ * for writing/releasing the buffer.
+ */
+xfs_buf_t *
+get_agino_buf(xfs_mount_t       *mp,
+               xfs_agnumber_t  agno,
+               xfs_agino_t     agino,
+               xfs_dinode_t    **dipp)
+{
+       ino_tree_node_t *irec;
+       xfs_buf_t *bp;
+       int size;
+
+       if ((irec = find_inode_rec(agno, agino)) == NULL)
+               return(NULL);
+       
+       size = XFS_FSB_TO_BB(mp, MAX(1, XFS_INODES_PER_CHUNK/inodes_per_block));
+       bp = libxfs_readbuf(mp->m_dev, XFS_AGB_TO_DADDR(mp, agno,
+               XFS_AGINO_TO_AGBNO(mp, irec->ino_startnum)), size, 0);
+       if (!bp) {
+               do_warn("cannot read inode (%u/%u), disk block %lld\n",
+                       agno, irec->ino_startnum,
+                       XFS_AGB_TO_DADDR(mp, agno,
+                               XFS_AGINO_TO_AGBNO(mp, irec->ino_startnum)));
+               return(NULL);
+       }
+
+       *dipp = XFS_MAKE_IPTR(mp, bp, agino -
+               XFS_OFFBNO_TO_AGINO(mp, XFS_AGINO_TO_AGBNO(mp,
+                                               irec->ino_startnum),
+               0));
+
+       return(bp);
+}
+
+/*
+ * these next routines return the filesystem blockno of the
+ * block containing the block "bno" in the file whose bmap
+ * tree (or extent list) is rooted by "rootblock".
+ *
+ * the next routines are utility routines for the third
+ * routine, get_bmapi().
+ */
+/* ARGSUSED */
+xfs_dfsbno_t
+getfunc_extlist(xfs_mount_t            *mp,
+               xfs_ino_t               ino,
+               xfs_dinode_t            *dip,
+               xfs_dfiloff_t           bno,
+               int                     whichfork)
+{
+       xfs_dfiloff_t           fbno;
+       xfs_dfilblks_t          bcnt;
+       xfs_dfsbno_t            fsbno;
+       xfs_dfsbno_t            final_fsbno = NULLDFSBNO;
+       xfs_bmbt_rec_32_t       *rootblock = (xfs_bmbt_rec_32_t *)
+                                               XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_CONVERT);
+       xfs_extnum_t            nextents = XFS_DFORK_NEXTENTS_ARCH(dip, whichfork, ARCH_CONVERT);
+       int                     i;
+       int                     flag;
+
+       for (i = 0; i < nextents; i++)  {
+               convert_extent(rootblock + i, &fbno, &fsbno, &bcnt, &flag);
+
+               if (fbno <= bno && bno < fbno + bcnt)  {
+                       final_fsbno = bno - fbno + fsbno;
+                       break;
+               }
+       }
+
+       return(final_fsbno);
+}
+
+xfs_dfsbno_t
+getfunc_btree(xfs_mount_t              *mp,
+               xfs_ino_t               ino,
+               xfs_dinode_t            *dip,
+               xfs_dfiloff_t           bno,
+               int                     whichfork)
+{
+       int                     i;
+       int                     prev_level;
+       int                     flag;
+       int                     found;
+       xfs_bmbt_rec_32_t       *rec;
+       xfs_bmbt_ptr_t          *pp;
+       xfs_bmbt_key_t          *key;
+       xfs_bmdr_key_t          *rkey;
+       xfs_bmdr_ptr_t          *rp;
+       xfs_dfiloff_t           fbno;
+       xfs_dfsbno_t            fsbno;
+       xfs_dfilblks_t          bcnt;
+       xfs_buf_t               *bp;
+       xfs_dfsbno_t            final_fsbno = NULLDFSBNO;
+       xfs_bmbt_block_t        *block;
+       xfs_bmdr_block_t        *rootblock = (xfs_bmdr_block_t *)
+                       XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_CONVERT);
+
+       ASSERT(rootblock->bb_level != 0);
+       /*
+        * deal with root block, it's got a slightly different
+        * header structure than interior nodes.  We know that
+        * a btree should have at least 2 levels otherwise it
+        * would be an extent list.
+        */
+       rkey = XFS_BTREE_KEY_ADDR(
+                       XFS_DFORK_SIZE_ARCH(dip, mp, whichfork, ARCH_CONVERT),
+                       xfs_bmdr, rootblock, 1,
+                       XFS_BTREE_BLOCK_MAXRECS(XFS_DFORK_SIZE_ARCH(dip,
+                                               mp, whichfork, ARCH_CONVERT),
+                       xfs_bmdr, 1));
+       rp = XFS_BTREE_PTR_ADDR(
+                       XFS_DFORK_SIZE_ARCH(dip, mp, whichfork, ARCH_CONVERT),
+                       xfs_bmdr, rootblock, 1,
+                       XFS_BTREE_BLOCK_MAXRECS(XFS_DFORK_SIZE_ARCH(dip,
+                                               mp, whichfork, ARCH_CONVERT),
+                       xfs_bmdr, 1));
+       for (found = -1, i = 0; i < rootblock->bb_numrecs - 1; i++)  {
+               if (rkey[i].br_startoff <= bno
+                               && bno < rkey[i+1].br_startoff)  {
+                       found = i;
+                       break;
+               }
+       }
+       if (i == rootblock->bb_numrecs - 1 && bno >= rkey[i].br_startoff)
+               found = i;
+
+       ASSERT(found != -1);
+
+       fsbno = INT_GET(rp[found], ARCH_CONVERT);
+
+       ASSERT(verify_dfsbno(mp, fsbno));
+
+       bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, fsbno),
+                               XFS_FSB_TO_BB(mp, 1), 0);
+       if (!bp) {
+               do_error("cannot read bmap block %llu\n", fsbno);
+               return(NULLDFSBNO);
+       }
+       block = XFS_BUF_TO_BMBT_BLOCK(bp);
+
+       /*
+        * ok, now traverse any interior btree nodes
+        */
+       prev_level = rootblock->bb_level;
+
+       while (INT_GET(block->bb_level, ARCH_CONVERT) > 0)  {
+               ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) < prev_level);
+
+               prev_level = INT_GET(block->bb_level, ARCH_CONVERT);
+
+               if (INT_GET(block->bb_numrecs, ARCH_CONVERT) >
+                                               mp->m_bmap_dmxr[1]) {
+                       do_warn("# of bmap records in inode %llu exceeds max "
+                               "(%u, max - %u)\n",
+                               ino, INT_GET(block->bb_numrecs, ARCH_CONVERT),
+                               mp->m_bmap_dmxr[1]);
+                       libxfs_putbuf(bp);
+                       return(NULLDFSBNO);
+               }
+               if (verbose && INT_GET(block->bb_numrecs, ARCH_CONVERT) <
+                                               mp->m_bmap_dmnr[1]) {
+                       do_warn("- # of bmap records in inode %llu < than min "
+                               "(%u, min - %u), proceeding ...\n",
+                               ino, INT_GET(block->bb_numrecs, ARCH_CONVERT),
+                               mp->m_bmap_dmnr[1]);
+               }
+               key = XFS_BTREE_KEY_ADDR(mp->m_sb.sb_blocksize,
+                       xfs_bmbt, block, 1, mp->m_bmap_dmxr[1]);
+               pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize,
+                       xfs_bmbt, block, 1, mp->m_bmap_dmxr[1]);
+               for (   found = -1, i = 0;
+                       i < INT_GET(block->bb_numrecs, ARCH_CONVERT) - 1;
+                       i++) {
+                       if (INT_GET(key[i].br_startoff, ARCH_CONVERT) <= bno &&
+                           bno < INT_GET(key[i+1].br_startoff, ARCH_CONVERT)) {
+                               found = i;
+                               break;
+                       }
+               }
+               if (i == INT_GET(block->bb_numrecs, ARCH_CONVERT) - 1 &&
+                       bno >= INT_GET(key[i].br_startoff, ARCH_CONVERT))
+                       found = i;
+
+               ASSERT(found != -1);
+               fsbno = INT_GET(pp[found], ARCH_CONVERT);
+
+               ASSERT(verify_dfsbno(mp, fsbno));
+
+               /*
+                * release current btree block and read in the
+                * next btree block to be traversed
+                */
+               libxfs_putbuf(bp);
+               bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, fsbno),
+                                       XFS_FSB_TO_BB(mp, 1), 0);
+               if (!bp) {
+                       do_error("cannot read bmap block %llu\n", fsbno);
+                       return(NULLDFSBNO);
+               }
+               block = XFS_BUF_TO_BMBT_BLOCK(bp);
+       }
+
+       /*
+        * current block must be a leaf block
+        */
+       ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) == 0);
+       if (INT_GET(block->bb_numrecs, ARCH_CONVERT) > mp->m_bmap_dmxr[0]) {
+               do_warn("# of bmap records in inode %llu greater than max "
+                       "(%u, max - %u)\n",
+                       ino, INT_GET(block->bb_numrecs, ARCH_CONVERT),
+                       mp->m_bmap_dmxr[0]);
+               libxfs_putbuf(bp);
+               return(NULLDFSBNO);
+       }
+       if (verbose && INT_GET(block->bb_numrecs, ARCH_CONVERT) <
+                                       mp->m_bmap_dmnr[0])
+               do_warn("- # of bmap records in inode %llu < min "
+                       "(%u, min - %u), continuing...\n",
+                       ino, INT_GET(block->bb_numrecs, ARCH_CONVERT),
+                       mp->m_bmap_dmnr[0]);
+
+       rec = (xfs_bmbt_rec_32_t *)XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize,
+               xfs_bmbt, block, 1, mp->m_bmap_dmxr[0]);
+       for (i = 0; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++)  {
+               convert_extent(rec + i, &fbno, &fsbno, &bcnt, &flag);
+
+               if (fbno <= bno && bno < fbno + bcnt)  {
+                       final_fsbno = bno - fbno + fsbno;
+                       break;
+               }
+       }
+       libxfs_putbuf(bp);
+
+       if (final_fsbno == NULLDFSBNO)
+               do_warn("could not map block %llu\n", bno);
+
+       return(final_fsbno);
+}
+
+/*
+ * this could be smarter.  maybe we should have an open inode
+ * routine that would get the inode buffer and return back
+ * an inode handle.  I'm betting for the moment that this
+ * is used only by the directory and attribute checking code
+ * and that the avl tree find and buffer cache search are
+ * relatively cheap.  If they're too expensive, we'll just
+ * have to fix this and add an inode handle to the da btree
+ * cursor.
+ *
+ * caller is responsible for checking doubly referenced blocks
+ * and references to holes
+ */
+xfs_dfsbno_t
+get_bmapi(xfs_mount_t *mp, xfs_dinode_t *dino_p,
+               xfs_ino_t ino_num, xfs_dfiloff_t bno, int whichfork)
+{
+       xfs_dfsbno_t            fsbno;
+
+       switch (XFS_DFORK_FORMAT_ARCH(dino_p, whichfork, ARCH_CONVERT)) {
+       case XFS_DINODE_FMT_EXTENTS:
+               fsbno = getfunc_extlist(mp, ino_num, dino_p, bno, whichfork);
+               break;
+       case XFS_DINODE_FMT_BTREE:
+               fsbno = getfunc_btree(mp, ino_num, dino_p, bno, whichfork); 
+               break;
+       case XFS_DINODE_FMT_LOCAL:
+               do_error("get_bmapi() called for local inode %llu\n", ino_num);
+               fsbno = NULLDFSBNO;
+               break;
+       default:
+               /*
+                * shouldn't happen
+                */
+               do_error("bad inode format for inode %llu\n", ino_num);
+               fsbno = NULLDFSBNO;
+       }
+
+       return(fsbno);
+}
+
+/*
+ * higher level inode processing stuff starts here:
+ * first, one utility routine for each type of inode
+ */
+
+/*
+ * return 1 if inode should be cleared, 0 otherwise
+ */
+/* ARGSUSED */
+int
+process_btinode(
+       xfs_mount_t             *mp,
+       xfs_agnumber_t          agno,
+       xfs_agino_t             ino,
+       xfs_dinode_t            *dip,
+       int                     type,
+       int                     *dirty,
+       xfs_drfsbno_t           *tot,
+       __uint64_t              *nex,
+       blkmap_t                **blkmapp,
+       int                     whichfork,
+       int                     check_dups)
+{
+       xfs_bmdr_block_t        *dib;
+       xfs_dfiloff_t           last_key;
+       xfs_dfiloff_t           first_key = 0;
+       xfs_ino_t               lino;
+       xfs_bmbt_ptr_t          *pp;
+       xfs_bmbt_key_t          *pkey;
+       char                    *forkname;
+       int                     i;
+       bmap_cursor_t           cursor;
+
+       dib = (xfs_bmdr_block_t *)XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_CONVERT);
+       lino = XFS_AGINO_TO_INO(mp, agno, ino);
+       *tot = 0;
+       *nex = 0;
+
+       if (whichfork == XFS_DATA_FORK)
+               forkname = "data";
+       else
+               forkname = "attr";
+
+       if (INT_GET(dib->bb_level, ARCH_CONVERT) == 0) {
+               /*
+                * This should never happen since a btree inode
+                * has to have at least one other block in the
+                * bmap in addition to the root block in the
+                * inode's data fork.
+                *
+                * XXX - if we were going to fix up the inode,
+                * we'd try to treat the fork as an interior
+                * node and see if we could get an accurate
+                * level value from one of the blocks pointed
+                * to by the pointers in the fork.  For now
+                * though, we just bail (and blow out the inode).
+                */
+               do_warn("bad level 0 in inode %llu bmap btree root block\n",
+                       XFS_AGINO_TO_INO(mp, agno, ino));
+               return(1);
+       }
+       /*
+        * use bmdr/dfork_dsize since the root block is in the data fork
+        */
+       init_bm_cursor(&cursor, INT_GET(dib->bb_level, ARCH_CONVERT) + 1);
+
+       if (XFS_BMDR_SPACE_CALC(INT_GET(dib->bb_numrecs, ARCH_CONVERT)) >
+                       ((whichfork == XFS_DATA_FORK) ?
+                       XFS_DFORK_DSIZE_ARCH(dip, mp, ARCH_CONVERT) :
+                       XFS_DFORK_ASIZE_ARCH(dip, mp, ARCH_CONVERT)))  {
+               do_warn(
+"indicated size of %s btree root (%d bytes) > space in inode %llu %s fork\n",
+                       forkname, XFS_BMDR_SPACE_CALC(INT_GET(dib->bb_numrecs, ARCH_CONVERT)),
+                       lino, forkname);
+               return(1);
+       }
+
+       pp = XFS_BTREE_PTR_ADDR(XFS_DFORK_SIZE_ARCH(dip, mp, whichfork, ARCH_CONVERT),
+               xfs_bmdr, dib, 1,
+               XFS_BTREE_BLOCK_MAXRECS(XFS_DFORK_SIZE_ARCH(dip, mp, whichfork, ARCH_CONVERT),
+               xfs_bmdr, 0));
+       pkey = XFS_BTREE_KEY_ADDR(XFS_DFORK_SIZE_ARCH(dip, mp, whichfork, ARCH_CONVERT),
+               xfs_bmdr, dib, 1,
+               XFS_BTREE_BLOCK_MAXRECS(XFS_DFORK_SIZE_ARCH(dip, mp, whichfork, ARCH_CONVERT),
+               xfs_bmdr, 0));
+
+       last_key = NULLDFILOFF;
+
+       for (i = 0; i < INT_GET(dib->bb_numrecs, ARCH_CONVERT); i++)  {
+               /*
+                * XXX - if we were going to do more to fix up the inode
+                * btree, we'd do it right here.  For now, if there's a
+                * problem, we'll bail out and presumably clear the inode.
+                */
+               if (!verify_dfsbno(mp, INT_GET(pp[i], ARCH_CONVERT)))  {
+                       do_warn("bad bmap btree ptr 0x%llx in ino %llu\n",
+                               INT_GET(pp[i], ARCH_CONVERT), lino);
+                       return(1);
+               }
+
+               if (scan_lbtree((xfs_dfsbno_t)INT_GET(pp[i], ARCH_CONVERT), INT_GET(dib->bb_level, ARCH_CONVERT),
+                                   scanfunc_bmap, type, whichfork,
+                                   lino, tot, nex, blkmapp, &cursor,
+                                   1, check_dups))
+                       return(1);
+               /*
+                * fix key (offset) mismatches between the keys in root
+                * block records and the first key of each child block.
+                * fixes cases where entries have been shifted between
+                * blocks but the parent hasn't been updated
+                */
+               if (check_dups == 0 &&
+                               cursor.level[INT_GET(dib->bb_level, ARCH_CONVERT)-1].first_key !=
+                                       INT_GET(pkey[i].br_startoff, ARCH_CONVERT))  {
+                       if (!no_modify)  {
+                               do_warn(
+"correcting key in bmbt root (was %llu, now %llu) in inode %llu %s fork\n",
+                                       INT_GET(pkey[i].br_startoff, ARCH_CONVERT),
+                                       cursor.level[INT_GET(dib->bb_level, ARCH_CONVERT)-1].first_key,
+                                       XFS_AGINO_TO_INO(mp, agno, ino),
+                                       forkname);
+                               *dirty = 1;
+                               INT_SET(pkey[i].br_startoff, ARCH_CONVERT, cursor.level[INT_GET(dib->bb_level, ARCH_CONVERT)-1].first_key);
+                       } else  {
+                               do_warn(
+"bad key in bmbt root (is %llu, would reset to %llu) in inode %llu %s fork\n",
+                                       INT_GET(pkey[i].br_startoff, ARCH_CONVERT),
+                                       cursor.level[INT_GET(dib->bb_level, ARCH_CONVERT)-1].first_key,
+                                       XFS_AGINO_TO_INO(mp, agno, ino),
+                                       forkname);
+                       }
+               }
+               /*
+                * make sure that keys are in ascending order.  blow out
+                * inode if the ordering doesn't hold
+                */
+               if (check_dups == 0)  {
+                       if (last_key != NULLDFILOFF && last_key >=
+                           cursor.level[INT_GET(dib->bb_level, ARCH_CONVERT)-1].first_key)  {
+                               do_warn(
+               "out of order bmbt root key %llu in inode %llu %s fork\n",
+                                       first_key,
+                                       XFS_AGINO_TO_INO(mp, agno, ino),
+                                       forkname);
+                               return(1);
+                       }
+                       last_key = cursor.level[INT_GET(dib->bb_level, ARCH_CONVERT)-1].first_key;
+               }
+       }
+       /*
+        * Check that the last child block's forward sibling pointer
+        * is NULL.
+        */
+       if (check_dups == 0 &&
+               cursor.level[0].right_fsbno != NULLDFSBNO)  {
+               do_warn(
+       "bad fwd (right) sibling pointer (saw %llu should be NULLDFSBNO)\n",
+                       cursor.level[0].right_fsbno);
+               do_warn(
+               "\tin inode %u (%s fork) bmap btree block %llu\n",
+                       XFS_AGINO_TO_INO(mp, agno, ino), forkname,
+                       cursor.level[0].fsbno);
+               return(1);
+       }
+       
+       return(0);
+}
+
+/*
+ * return 1 if inode should be cleared, 0 otherwise
+ */
+/* ARGSUSED */
+int
+process_exinode(
+       xfs_mount_t             *mp,
+       xfs_agnumber_t          agno,
+       xfs_agino_t             ino,
+       xfs_dinode_t            *dip,
+       int                     type,
+       int                     *dirty,
+       xfs_drfsbno_t           *tot,
+       __uint64_t              *nex,
+       blkmap_t                **blkmapp,
+       int                     whichfork,
+       int                     check_dups)
+{
+       xfs_ino_t               lino;
+       xfs_bmbt_rec_32_t       *rp;
+       xfs_dfiloff_t           first_key;
+       xfs_dfiloff_t           last_key;
+
+       lino = XFS_AGINO_TO_INO(mp, agno, ino);
+       rp = (xfs_bmbt_rec_32_t *)XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_CONVERT);
+       *tot = 0;
+       *nex = XFS_DFORK_NEXTENTS_ARCH(dip, whichfork, ARCH_CONVERT);
+       /*
+        * XXX - if we were going to fix up the btree record,
+        * we'd do it right here.  For now, if there's a problem,
+        * we'll bail out and presumably clear the inode.
+        */
+       if (check_dups == 0)
+               return(process_bmbt_reclist(mp, rp, *nex, type, lino,
+                                       tot, blkmapp, &first_key, &last_key,
+                                       whichfork));
+       else
+               return(scan_bmbt_reclist(mp, rp, *nex, type, lino, tot,
+                                       whichfork));
+}
+
+/*
+ * return 1 if inode should be cleared, 0 otherwise
+ */
+/* ARGSUSED */
+int
+process_lclinode(
+       xfs_mount_t             *mp,
+       xfs_agnumber_t          agno,
+       xfs_agino_t             ino,
+       xfs_dinode_t            *dip,
+       int                     type,
+       int                     *dirty,
+       xfs_drfsbno_t           *tot,
+       __uint64_t              *nex,
+       blkmap_t                **blkmapp,
+       int                     whichfork,
+       int                     check_dups)
+{
+       xfs_attr_shortform_t    *asf;
+       xfs_dinode_core_t       *dic;
+       xfs_ino_t               lino;
+
+       *tot = 0;
+       *nex = 0;       /* local inodes have 0 extents */
+
+       dic = &dip->di_core;
+       lino = XFS_AGINO_TO_INO(mp, agno, ino);
+       if (whichfork == XFS_DATA_FORK &&
+           INT_GET(dic->di_size, ARCH_CONVERT) > XFS_DFORK_DSIZE_ARCH(dip, mp, ARCH_CONVERT)) {
+               do_warn(
+       "local inode %llu data fork is too large (size = %lld, max = %d)\n",
+                       lino, INT_GET(dic->di_size, ARCH_CONVERT), XFS_DFORK_DSIZE_ARCH(dip, mp, ARCH_CONVERT));
+               return(1);
+       } else if (whichfork == XFS_ATTR_FORK) {
+               asf = (xfs_attr_shortform_t *) XFS_DFORK_APTR_ARCH(dip, ARCH_CONVERT);
+               if (INT_GET(asf->hdr.totsize, ARCH_CONVERT) > XFS_DFORK_ASIZE_ARCH(dip, mp, ARCH_CONVERT)) {
+                       do_warn(
+               "local inode %llu attr fork too large (size %d, max = %d)\n",
+                                       lino, INT_GET(asf->hdr.totsize, ARCH_CONVERT),
+                                       XFS_DFORK_ASIZE_ARCH(dip, mp, ARCH_CONVERT));
+                       return(1);
+               }
+               if (INT_GET(asf->hdr.totsize, ARCH_CONVERT) < sizeof(xfs_attr_sf_hdr_t)) {
+                       do_warn(
+               "local inode %llu attr too small (size = %d, min size = %d)\n",
+                                       lino, INT_GET(asf->hdr.totsize, ARCH_CONVERT),
+                                       sizeof(xfs_attr_sf_hdr_t));
+                       return(1);
+               }
+       }
+
+       return(0);
+}
+
+int
+process_symlink_extlist(xfs_mount_t *mp, xfs_ino_t lino, xfs_dinode_t *dino)
+{
+       xfs_dfsbno_t            start;          /* start */
+       xfs_dfilblks_t          cnt;            /* count */
+       xfs_dfiloff_t           offset;         /* offset */
+       xfs_dfiloff_t           expected_offset;
+       xfs_bmbt_rec_32_t       *rp;
+       int                     numrecs;
+       int                     i;
+       int                     max_blocks;
+       int                     whichfork = XFS_DATA_FORK;
+       int                     flag;
+
+       if (INT_GET(dino->di_core.di_size, ARCH_CONVERT) <= XFS_DFORK_SIZE_ARCH(dino, mp, whichfork, ARCH_CONVERT))  {
+               if (dino->di_core.di_format == XFS_DINODE_FMT_LOCAL)  {
+                       return(0);
+               } else  {
+                       do_warn(
+"mismatch between format (%d) and size (%lld) in symlink ino %llu\n",
+                               dino->di_core.di_format,
+                               INT_GET(dino->di_core.di_size, ARCH_CONVERT),
+                               lino);
+                       return(1);
+               }
+       } else if (dino->di_core.di_format == XFS_DINODE_FMT_LOCAL)  {
+               do_warn(
+"mismatch between format (%d) and size (%lld) in symlink inode %llu\n",
+                               dino->di_core.di_format,
+                               INT_GET(dino->di_core.di_size, ARCH_CONVERT),
+                               lino);
+               return(1);
+       }
+
+       rp = (xfs_bmbt_rec_32_t *)XFS_DFORK_PTR_ARCH(dino, whichfork, ARCH_CONVERT);
+       numrecs = XFS_DFORK_NEXTENTS_ARCH(dino, whichfork, ARCH_CONVERT);
+
+       /*
+        * the max # of extents in a symlink inode is equal to the
+        * number of max # of blocks required to store the symlink 
+        */
+       if (numrecs > max_symlink_blocks)  {
+               do_warn(
+               "bad number of extents (%d) in symlink %llu data fork\n",
+                       numrecs, lino);
+               return(1);
+       }
+
+       max_blocks = max_symlink_blocks;
+       expected_offset = 0;
+
+       for (i = 0; numrecs > 0; i++, numrecs--)  {
+               convert_extent(rp, &offset, &start, &cnt, &flag);
+
+               if (offset != expected_offset)  {
+                       do_warn(
+               "bad extent #%d offset (%llu) in symlink %llu data fork\n",
+                               i, offset, lino);
+                       return(1);
+               }
+               if (cnt == 0 || cnt > max_blocks)  {
+                       do_warn(
+               "bad extent #%d count (%llu) in symlink %llu data fork\n",
+                               i, cnt, lino);
+                       return(1);
+               }
+
+               max_blocks -= cnt;
+               expected_offset += cnt;
+       }
+
+       return(0);
+}
+
+/*
+ * takes a name and length and returns 1 if the name contains
+ * a \0, returns 0 otherwise
+ */
+int
+null_check(char *name, int length)
+{
+       int i;
+
+       ASSERT(length < MAXPATHLEN);
+
+       for (i = 0; i < length; i++, name++)  {
+               if (*name == '\0')
+                       return(1);
+       }
+
+       return(0);
+}
+
+/*
+ * like usual, returns 0 if everything's ok and 1 if something's
+ * bogus
+ */
+int
+process_symlink(xfs_mount_t *mp, xfs_ino_t lino, xfs_dinode_t *dino,
+               blkmap_t *blkmap)
+{
+       xfs_dfsbno_t            fsbno;
+       xfs_dinode_core_t       *dinoc = &dino->di_core;
+       xfs_buf_t               *bp = NULL;
+       char                    *symlink, *cptr, *buf_data;
+       int                     i, size, amountdone;
+       char                    data[MAXPATHLEN];
+
+       /*
+        * check size against kernel symlink limits.  we know
+        * size is consistent with inode storage format -- e.g.
+        * the inode is structurally ok so we don't have to check
+        * for that
+        */
+       if (INT_GET(dinoc->di_size, ARCH_CONVERT) >= MAXPATHLEN)  {
+               do_warn("symlink in inode %llu too long (%lld chars)\n",
+                       lino, INT_GET(dinoc->di_size, ARCH_CONVERT));
+               return(1);
+       }
+
+       /*
+        * have to check symlink component by component.
+        * get symlink contents into data area
+        */
+       symlink = &data[0];
+       if (INT_GET(dinoc->di_size, ARCH_CONVERT)
+                       <= XFS_DFORK_DSIZE_ARCH(dino, mp, ARCH_CONVERT))  {
+               /*
+                * local symlink, just copy the symlink out of the
+                * inode into the data area
+                */
+               bcopy((char *)XFS_DFORK_DPTR_ARCH(dino, ARCH_CONVERT),
+                       symlink, INT_GET(dinoc->di_size, ARCH_CONVERT));
+       } else {
+               /*
+                * stored in a meta-data file, have to bmap one block
+                * at a time and copy the symlink into the data area
+                */
+               i = size = amountdone = 0;
+               cptr = symlink;
+
+               while (amountdone < INT_GET(dinoc->di_size, ARCH_CONVERT)) {
+                       fsbno = blkmap_get(blkmap, i);
+                       if (fsbno != NULLDFSBNO)
+                               bp = libxfs_readbuf(mp->m_dev,
+                                               XFS_FSB_TO_DADDR(mp, fsbno),
+                                               XFS_FSB_TO_BB(mp, 1), 0);
+                       if (!bp || fsbno == NULLDFSBNO) {
+                               do_warn("cannot read inode %llu, file block %d,"
+                                       " disk block %llu\n", lino, i, fsbno);
+                               return(1);
+                       }
+
+                       buf_data = (char *)XFS_BUF_PTR(bp);
+                       size = MIN(INT_GET(dinoc->di_size, ARCH_CONVERT)
+                               - amountdone, (int)XFS_FSB_TO_BB(mp, 1)*BBSIZE);
+                       bcopy(buf_data, cptr, size);
+                       cptr += size;
+                       amountdone += size;
+                       i++;
+                       libxfs_putbuf(bp);
+               }
+       }
+       data[INT_GET(dinoc->di_size, ARCH_CONVERT)] = '\0';
+
+       /*
+        * check for nulls
+        */
+       if (null_check(symlink, (int) INT_GET(dinoc->di_size, ARCH_CONVERT)))  {
+               do_warn("found illegal null character in symlink inode %llu\n",
+                       lino);
+               return(1);
+       }
+
+       /*
+        * check for any component being too long
+        */
+       if (INT_GET(dinoc->di_size, ARCH_CONVERT) >= MAXNAMELEN)  {
+               cptr = strchr(symlink, '/');
+
+               while (cptr != NULL)  {
+                       if (cptr - symlink >= MAXNAMELEN)  {
+                               do_warn(
+                               "component of symlink in inode %llu too long\n",
+                                       lino);
+                               return(1);
+                       }
+                       symlink = cptr + 1;
+                       cptr = strchr(symlink, '/');
+               }
+
+               if (strlen(symlink) >= MAXNAMELEN)  {
+                       do_warn("component of symlink in inode %llu too long\n",
+                               lino);
+                       return(1);
+               }
+       }
+
+       return(0);
+}
+
+/*
+ * called to process the set of misc inode special inode types
+ * that have no associated data storage (fifos, pipes, devices, etc.).
+ */
+/* ARGSUSED */
+int
+process_misc_ino_types(xfs_mount_t     *mp,
+                       xfs_dinode_t    *dino,
+                       xfs_ino_t       lino,
+                       int             type)
+{
+       /*
+        * disallow mountpoint inodes until such time as the
+        * kernel actually allows them to be created (will
+        * probably require a superblock version rev, sigh).
+        */
+       if (type == XR_INO_MOUNTPOINT)  {
+               do_warn("inode %llu has bad inode type (IFMNT)\n", lino);
+               return(1);
+       }
+
+       /*
+        * must also have a zero size
+        */
+       if (INT_GET(dino->di_core.di_size, ARCH_CONVERT) != 0)  {
+               switch (type)  {
+               case XR_INO_CHRDEV:
+                       do_warn("size of character device inode %llu != 0 "
+                               "(%lld bytes)\n", lino,
+                               INT_GET(dino->di_core.di_size, ARCH_CONVERT));
+                       break;
+               case XR_INO_BLKDEV:
+                       do_warn("size of block device inode %llu != 0 "
+                               "(%lld bytes)\n", lino,
+                               INT_GET(dino->di_core.di_size, ARCH_CONVERT));
+                       break;
+               case XR_INO_SOCK:
+                       do_warn("size of socket inode %llu != 0 "
+                               "(%lld bytes)\n", lino,
+                               INT_GET(dino->di_core.di_size, ARCH_CONVERT));
+                       break;
+               case XR_INO_FIFO:
+                       do_warn("size of fifo inode %llu != 0 "
+                               "(%lld bytes)\n", lino,
+                               INT_GET(dino->di_core.di_size, ARCH_CONVERT));
+                       break;
+               default:
+                       do_warn("Internal error - process_misc_ino_types, "
+                               "illegal type %d\n", type);
+                       abort();
+               }
+
+               return(1);
+       }
+
+       return(0);
+}
+
+int
+process_misc_ino_types_blocks(xfs_drfsbno_t totblocks, xfs_ino_t lino, int type)
+{
+       /*
+        * you can not enforce all misc types have zero data fork blocks
+        * by checking dino->di_core.di_nblocks because atotblocks (attribute
+        * blocks) are part of nblocks. We must check this later when atotblocks
+        * has been calculated or by doing a simple check that anExtents == 0. 
+        * We must also guarantee that totblocks is 0. Thus nblocks checking
+        * will be done later in process_dinode_int for misc types.
+        */
+
+       if (totblocks != 0)  {
+               switch (type)  {
+               case XR_INO_CHRDEV:
+                       do_warn(
+               "size of character device inode %llu != 0 (%llu blocks)\n",
+                               lino, totblocks);
+                       break;
+               case XR_INO_BLKDEV:
+                       do_warn(
+               "size of block device inode %llu != 0 (%llu blocks)\n",
+                               lino, totblocks);
+                       break;
+               case XR_INO_SOCK:
+                       do_warn(
+               "size of socket inode %llu != 0 (%llu blocks)\n",
+                               lino, totblocks);
+                       break;
+               case XR_INO_FIFO:
+                       do_warn(
+               "size of fifo inode %llu != 0 (%llu blocks)\n",
+                               lino, totblocks);
+                       break;
+               default:
+                       return(0);
+               }
+               return(1);
+       }
+       return (0);
+}
+
+/*
+ * returns 0 if the inode is ok, 1 if the inode is corrupt
+ * check_dups can be set to 1 *only* when called by the
+ * first pass of the duplicate block checking of phase 4.
+ * *dirty is set > 0 if the dinode has been altered and
+ * needs to be written out.
+ *
+ * for detailed, info, look at process_dinode() comments.
+ */
+/* ARGSUSED */
+int
+process_dinode_int(xfs_mount_t *mp,
+               xfs_dinode_t *dino,
+               xfs_agnumber_t agno,
+               xfs_agino_t ino,
+               int was_free,           /* 1 if inode is currently free */
+               int *dirty,             /* out == > 0 if inode is now dirty */
+               int *cleared,           /* out == 1 if inode was cleared */
+               int *used,              /* out == 1 if inode is in use */
+               int verify_mode,        /* 1 == verify but don't modify inode */
+               int uncertain,          /* 1 == inode is uncertain */
+               int ino_discovery,      /* 1 == check dirs for unknown inodes */
+               int check_dups,         /* 1 == check if inode claims
+                                        * duplicate blocks             */
+               int extra_attr_check, /* 1 == do attribute format and value checks */
+               int *isa_dir,           /* out == 1 if inode is a directory */
+               xfs_ino_t *parent)      /* out -- parent if ino is a dir */
+{
+       xfs_drfsbno_t           totblocks = 0;
+       xfs_drfsbno_t           atotblocks = 0;
+       xfs_dinode_core_t       *dinoc;
+       char                    *rstring;
+       int                     type;
+       int                     rtype;
+       int                     do_rt;
+       int                     err;
+       int                     retval = 0;
+       __uint64_t              nextents;
+       __uint64_t              anextents;
+       xfs_ino_t               lino;
+       const int               is_free = 0;
+       const int               is_used = 1;
+       int                     repair = 0;
+       blkmap_t                *ablkmap = NULL;
+       blkmap_t                *dblkmap = NULL;
+       static char             okfmts[] = {
+               0,                              /* free inode */
+               1 << XFS_DINODE_FMT_DEV,        /* FIFO */
+               1 << XFS_DINODE_FMT_DEV,        /* CHR */
+               0,                              /* type 3 unused */
+               (1 << XFS_DINODE_FMT_LOCAL) |
+               (1 << XFS_DINODE_FMT_EXTENTS) |
+               (1 << XFS_DINODE_FMT_BTREE),    /* DIR */
+               0,                              /* type 5 unused */
+               1 << XFS_DINODE_FMT_DEV,        /* BLK */
+               0,                              /* type 7 unused */
+               (1 << XFS_DINODE_FMT_EXTENTS) |
+               (1 << XFS_DINODE_FMT_BTREE),    /* REG */
+               0,                              /* type 9 unused */
+               (1 << XFS_DINODE_FMT_LOCAL) |
+               (1 << XFS_DINODE_FMT_EXTENTS),  /* LNK */
+               0,                              /* type 11 unused */
+               1 << XFS_DINODE_FMT_DEV,        /* SOCK */
+               0,                              /* type 13 unused */
+               1 << XFS_DINODE_FMT_UUID,       /* MNT */
+               0                               /* type 15 unused */
+       };
+
+       retval = 0;
+       totblocks = atotblocks = 0;
+       *dirty = *isa_dir = *cleared = 0;
+       *used = is_used;
+       type = rtype = XR_INO_UNKNOWN;
+       rstring = NULL;
+       do_rt = 0;
+
+       dinoc = &dino->di_core;
+       lino = XFS_AGINO_TO_INO(mp, agno, ino);
+
+       /*
+        * if in verify mode, don't modify the inode.
+        *
+        * if correcting, reset stuff that has known values
+        *
+        * if in uncertain mode, be silent on errors since we're
+        * trying to find out if these are inodes as opposed
+        * to assuming that they are.  Just return the appropriate
+        * return code in that case.
+        */
+
+       if (INT_GET(dinoc->di_magic, ARCH_CONVERT) != XFS_DINODE_MAGIC)  {
+               retval++;
+               if (!verify_mode)  {
+                       do_warn("bad magic number 0x%x on inode %llu, ", 
+                               INT_GET(dinoc->di_magic, ARCH_CONVERT), lino);
+                       if (!no_modify)  {
+                               do_warn("resetting magic number\n");
+                               *dirty = 1;
+                               INT_SET(dinoc->di_magic, ARCH_CONVERT, XFS_DINODE_MAGIC);
+                       } else  {
+                               do_warn("would reset magic number\n");
+                       }
+               } else if (!uncertain) {
+                       do_warn("bad magic number 0x%x on inode %llu\n", 
+                               INT_GET(dinoc->di_magic, ARCH_CONVERT), lino);
+               }
+       }
+
+       if (!XFS_DINODE_GOOD_VERSION(dinoc->di_version) ||
+           (!fs_inode_nlink && dinoc->di_version > XFS_DINODE_VERSION_1))  {
+               retval++;
+               if (!verify_mode)  {
+                       do_warn("bad version number 0x%x on inode %llu, ", 
+                               dinoc->di_version, lino);
+                       if (!no_modify)  {
+                               do_warn("resetting version number\n");
+                               *dirty = 1;
+                               dinoc->di_version = (fs_inode_nlink) ?
+                                       XFS_DINODE_VERSION_2 :
+                                       XFS_DINODE_VERSION_1;
+                       } else  {
+                               do_warn("would reset version number\n");
+                       }
+               } else  if (!uncertain) {
+                       do_warn("bad version number 0x%x on inode %llu\n", 
+                               dinoc->di_version, lino);
+               }
+       }
+
+       /*
+        * blow out of here if the inode size is < 0
+        */
+       if (INT_GET(dinoc->di_size, ARCH_CONVERT) < 0)  {
+               retval++;
+               if (!verify_mode)  {
+                       do_warn("bad (negative) size %lld on inode %llu\n",
+                               INT_GET(dinoc->di_size, ARCH_CONVERT), lino);
+                       if (!no_modify)  {
+                               *dirty += clear_dinode(mp, dino, lino);
+                               *cleared = 1;
+                       } else  {
+                               *dirty = 1;
+                               *cleared = 1;
+                       }
+                       *used = is_free;
+               } else if (!uncertain)  {
+                       do_warn("bad (negative) size %lld on inode %llu\n",
+                               INT_GET(dinoc->di_size, ARCH_CONVERT), lino);
+               }
+
+               return(1);
+       }
+
+       /*
+        * was_free value is not meaningful if we're in verify mode
+        */
+       if (!verify_mode && INT_GET(dinoc->di_mode, ARCH_CONVERT) == 0 && was_free == 1)  {
+               /*
+                * easy case, inode free -- inode and map agree, clear
+                * it just in case to ensure that format, etc. are
+                * set correctly
+                */
+               if (!no_modify)  {
+                       err =  clear_dinode(mp, dino, lino);
+                       if (err)  {
+                               *dirty = 1;
+                               *cleared = 1;
+                       }
+               }
+               *used = is_free;
+               return(0);
+       } else if (!verify_mode && INT_GET(dinoc->di_mode, ARCH_CONVERT) == 0 && was_free == 0)  {
+               /*
+                * the inode looks free but the map says it's in use.
+                * clear the inode just to be safe and mark the inode
+                * free.
+                */
+               do_warn("imap claims a free inode %llu is in use, ", lino);
+
+               if (!no_modify)  {
+                       do_warn("correcting imap and clearing inode\n");
+
+                       err =  clear_dinode(mp, dino, lino);
+                       if (err)  {
+                               retval++;
+                               *dirty = 1;
+                               *cleared = 1;
+                       }
+               } else  {
+                       do_warn("would correct imap and clear inode\n");
+
+                       *dirty = 1;
+                       *cleared = 1;
+               }
+
+               *used = is_free;
+
+               return(retval > 0 ? 1 : 0);
+       }
+
+       /*
+        * because of the lack of any write ordering guarantee, it's
+        * possible that the core got updated but the forks didn't.
+        * so rather than be ambitious (and probably incorrect),
+        * if there's an inconsistency, we get conservative and 
+        * just pitch the file.  blow off checking formats of
+        * free inodes since technically any format is legal
+        * as we reset the inode when we re-use it.
+        */
+       if (INT_GET(dinoc->di_mode, ARCH_CONVERT) != 0 &&
+               ((((INT_GET(dinoc->di_mode, ARCH_CONVERT) & IFMT) >> 12) > 15) ||
+               dinoc->di_format < XFS_DINODE_FMT_DEV ||
+               dinoc->di_format > XFS_DINODE_FMT_UUID ||
+                       (!(okfmts[(INT_GET(dinoc->di_mode, ARCH_CONVERT) & IFMT) >> 12] &
+                         (1 << dinoc->di_format))))) {
+               /* bad inode format */
+               retval++;
+               if (!uncertain)
+                       do_warn("bad inode format in inode %llu\n", lino);
+               if (!verify_mode)  {
+                       if (!no_modify)  {
+                               *dirty += clear_dinode(mp, dino, lino);
+                               ASSERT(*dirty > 0);
+                       }
+               }
+               *cleared = 1;
+               *used = is_free;
+
+               return(retval > 0 ? 1 : 0);
+       }
+
+       if (verify_mode)
+               return(retval > 0 ? 1 : 0);
+
+       /*
+        * clear the next unlinked field if necessary on a good
+        * inode only during phase 4 -- when checking for inodes
+        * referencing duplicate blocks.  then it's safe because
+        * we've done the inode discovery and have found all the inodes
+        * we're going to find.  check_dups is set to 1 only during
+        * phase 4.  Ugly.
+        */
+       if (check_dups && !no_modify)
+               *dirty += clear_dinode_unlinked(mp, dino);
+
+       /* set type and map type info */
+
+       switch (INT_GET(dinoc->di_mode, ARCH_CONVERT) & IFMT) {
+       case IFDIR:
+               type = XR_INO_DIR;
+               *isa_dir = 1;
+               break;
+       case IFREG:
+               if (INT_GET(dinoc->di_flags, ARCH_CONVERT) & XFS_DIFLAG_REALTIME)
+                       type = XR_INO_RTDATA;
+               else if (lino == mp->m_sb.sb_rbmino)
+                       type = XR_INO_RTBITMAP;
+               else if (lino == mp->m_sb.sb_rsumino)
+                       type = XR_INO_RTSUM;
+               else
+                       type = XR_INO_DATA;
+               break;
+       case IFLNK:
+               type = XR_INO_SYMLINK;
+               break;
+       case IFCHR:
+               type = XR_INO_CHRDEV;
+               break;
+       case IFBLK:
+               type = XR_INO_BLKDEV;
+               break;
+       case IFSOCK:
+               type = XR_INO_SOCK;
+               break;
+       case IFIFO:
+               type = XR_INO_FIFO;
+               break;
+       case IFMNT:
+               type = XR_INO_MOUNTPOINT;
+               break;
+       default:
+               type = XR_INO_UNKNOWN;
+               do_warn("Unexpected inode type %#o inode %llu\n",
+                       (int) (INT_GET(dinoc->di_mode, ARCH_CONVERT) & IFMT), lino);
+               abort();
+               break;
+       }
+
+       /*
+        * type checks for root, realtime inodes, and quota inodes
+        */
+       if (lino == mp->m_sb.sb_rootino && type != XR_INO_DIR)  {
+               do_warn("bad inode type for root inode %llu, ", lino);
+               type = XR_INO_DIR;
+
+               if (!no_modify)  {
+                       do_warn("resetting to directory\n");
+                       INT_MOD_EXPR(dinoc->di_mode, ARCH_CONVERT, &= ~(INT_GET(dinoc->di_mode, ARCH_CONVERT) & IFMT));
+                       INT_MOD_EXPR(dinoc->di_mode, ARCH_CONVERT, |= INT_GET(dinoc->di_mode, ARCH_CONVERT) & IFDIR);
+               } else  {
+                       do_warn("would reset to directory\n");
+               }
+       } else if (lino == mp->m_sb.sb_rsumino)  {
+               do_rt = 1;
+               rstring = "summary";
+               rtype = XR_INO_RTSUM;
+       } else if (lino == mp->m_sb.sb_rbmino)  {
+               do_rt = 1;
+               rstring = "bitmap";
+               rtype = XR_INO_RTBITMAP;
+       } else if (lino == mp->m_sb.sb_uquotino)  {
+               if (type != XR_INO_DATA)  {
+                       do_warn("user quota inode has bad type 0x%x\n",
+                               INT_GET(dinoc->di_mode, ARCH_CONVERT) & IFMT);
+
+                       if (!no_modify)  {
+                               *dirty += clear_dinode(mp, dino, lino);
+                               ASSERT(*dirty > 0);
+                       }
+
+                       *cleared = 1;
+                       *used = is_free;
+                       *isa_dir = 0;
+
+                       mp->m_sb.sb_uquotino = NULLFSINO;
+
+                       return(1);
+               }
+       } else if (lino == mp->m_sb.sb_pquotino)  {
+               if (type != XR_INO_DATA)  {
+                       do_warn("project quota inode has bad type 0x%x\n",
+                               INT_GET(dinoc->di_mode, ARCH_CONVERT) & IFMT);
+
+                       if (!no_modify)  {
+                               *dirty += clear_dinode(mp, dino, lino);
+                               ASSERT(*dirty > 0);
+                       }
+
+                       *cleared = 1;
+                       *used = is_free;
+                       *isa_dir = 0;
+
+                       mp->m_sb.sb_pquotino = NULLFSINO;
+
+                       return(1);
+               }
+       }
+
+       if (do_rt && type != rtype)  {
+               type = XR_INO_DATA;
+
+               do_warn("bad inode type for realtime %s inode %llu, ",
+                       rstring, lino);
+
+               if (!no_modify)  {
+                       do_warn("resetting to regular file\n");
+                       INT_MOD_EXPR(dinoc->di_mode, ARCH_CONVERT, &= ~(INT_GET(dinoc->di_mode, ARCH_CONVERT) & IFMT));
+                       INT_MOD_EXPR(dinoc->di_mode, ARCH_CONVERT, |= INT_GET(dinoc->di_mode, ARCH_CONVERT) & IFREG);
+               } else  {
+                       do_warn("would reset to regular file\n");
+               }
+       }
+
+       /*
+        * only realtime inodes should have extsize set
+        */
+       if (type != XR_INO_RTDATA && INT_GET(dinoc->di_extsize, ARCH_CONVERT) != 0)  {
+               do_warn(
+"bad non-zero extent size value %u for non-realtime inode %llu,",
+                       INT_GET(dinoc->di_extsize, ARCH_CONVERT), lino);
+
+               if (!no_modify)  {
+                       do_warn("resetting to zero\n");
+                       INT_ZERO(dinoc->di_extsize, ARCH_CONVERT);
+                       *dirty = 1;
+               } else  {
+                       do_warn("would reset to zero\n");
+               }
+       }
+
+       /*
+        * for realtime inodes, check sizes to see that
+        * they are consistent with the # of realtime blocks.
+        * also, verify that they contain only one extent and
+        * are extent format files.  If anything's wrong, clear
+        * the inode -- we'll recreate it in phase 6.
+        */
+       if (do_rt && INT_GET(dinoc->di_size, ARCH_CONVERT)
+                       != mp->m_sb.sb_rbmblocks * mp->m_sb.sb_blocksize)  {
+               do_warn("bad size %llu for realtime %s inode %llu\n",
+                       INT_GET(dinoc->di_size, ARCH_CONVERT), rstring, lino);
+
+               if (!no_modify)  {
+                       *dirty += clear_dinode(mp, dino, lino);
+                       ASSERT(*dirty > 0);
+               }
+
+               *cleared = 1;
+               *used = is_free;
+               *isa_dir = 0;
+
+               return(1);
+       }
+
+       if (do_rt && mp->m_sb.sb_rblocks == 0 && INT_GET(dinoc->di_nextents, ARCH_CONVERT) != 0)  {
+               do_warn("bad # of extents (%u) for realtime %s inode %llu\n",
+                       INT_GET(dinoc->di_nextents, ARCH_CONVERT), rstring, lino);
+
+               if (!no_modify)  {
+                       *dirty += clear_dinode(mp, dino, lino);
+                       ASSERT(*dirty > 0);
+               }
+
+               *cleared = 1;
+               *used = is_free;
+               *isa_dir = 0;
+
+               return(1);
+       }
+
+       /*
+        * Setup nextents and anextents for blkmap_alloc calls.
+        */
+       nextents = INT_GET(dinoc->di_nextents, ARCH_CONVERT);
+       if (nextents > INT_GET(dinoc->di_nblocks, ARCH_CONVERT) || nextents > XFS_MAX_INCORE_EXTENTS)
+               nextents = 1;
+       anextents = INT_GET(dinoc->di_anextents, ARCH_CONVERT);
+       if (anextents > INT_GET(dinoc->di_nblocks, ARCH_CONVERT) || anextents > XFS_MAX_INCORE_EXTENTS)
+               anextents = 1;
+
+       /*
+        * general size/consistency checks:
+        *
+        * if the size <= size of the data fork, directories  must be
+        * local inodes unlike regular files which would be extent inodes.
+        * all the other mentioned types have to have a zero size value.
+        *
+        * if the size and format don't match, get out now rather than
+        * risk trying to process a non-existent extents or btree
+        * type data fork.
+        */
+       switch (type)  {
+       case XR_INO_DIR:
+               if (INT_GET(dinoc->di_size, ARCH_CONVERT) <= XFS_DFORK_DSIZE_ARCH(dino, mp, ARCH_CONVERT)
+                               && dinoc->di_format != XFS_DINODE_FMT_LOCAL)  {
+                       do_warn(
+"mismatch between format (%d) and size (%lld) in directory ino %llu\n",
+                               dinoc->di_format,
+                               INT_GET(dinoc->di_size, ARCH_CONVERT),
+                               lino);
+
+                       if (!no_modify)  {
+                               *dirty += clear_dinode(mp,
+                                               dino, lino);
+                               ASSERT(*dirty > 0);
+                       }
+
+                       *cleared = 1;
+                       *used = is_free;
+                       *isa_dir = 0;
+
+                       return(1);
+               }
+               if (dinoc->di_format != XFS_DINODE_FMT_LOCAL)
+                       dblkmap = blkmap_alloc(nextents);
+               break;
+       case XR_INO_SYMLINK:
+               if (process_symlink_extlist(mp, lino, dino))  {
+                       do_warn("bad data fork in symlink %llu\n", lino);
+
+                       if (!no_modify)  {
+                               *dirty += clear_dinode(mp,
+                                               dino, lino);
+                               ASSERT(*dirty > 0);
+                       }
+
+                       *cleared = 1;
+                       *used = is_free;
+                       *isa_dir = 0;
+
+                       return(1);
+               }
+               if (dinoc->di_format != XFS_DINODE_FMT_LOCAL)
+                       dblkmap = blkmap_alloc(nextents);
+               break;
+       case XR_INO_CHRDEV:     /* fall through to FIFO case ... */
+       case XR_INO_BLKDEV:     /* fall through to FIFO case ... */
+       case XR_INO_SOCK:       /* fall through to FIFO case ... */
+       case XR_INO_MOUNTPOINT: /* fall through to FIFO case ... */
+       case XR_INO_FIFO:
+               if (process_misc_ino_types(mp, dino, lino, type))  {
+                       if (!no_modify)  {
+                               *dirty += clear_dinode(mp, dino, lino);
+                               ASSERT(*dirty > 0);
+                       }
+
+                       *cleared = 1;
+                       *used = is_free;
+                       *isa_dir = 0;
+
+                       return(1);
+               }
+               break;
+       case XR_INO_RTDATA:
+               /*
+                * if we have no realtime blocks, any inode claiming
+                * to be a real-time file is bogus
+                */
+               if (mp->m_sb.sb_rblocks == 0)  {
+                       do_warn(
+                       "found inode %llu claiming to be a real-time file\n",
+                               lino);
+
+                       if (!no_modify)  {
+                               *dirty += clear_dinode(mp, dino, lino);
+                               ASSERT(*dirty > 0);
+                       }
+
+                       *cleared = 1;
+                       *used = is_free;
+                       *isa_dir = 0;
+
+                       return(1);
+               }
+               break;
+       case XR_INO_RTBITMAP:
+               if (INT_GET(dinoc->di_size, ARCH_CONVERT) != (__int64_t) mp->m_sb.sb_rbmblocks *
+                               mp->m_sb.sb_blocksize)  {
+                       do_warn(
+       "realtime bitmap inode %llu has bad size %lld (should be %lld)\n",
+                               lino, INT_GET(dinoc->di_size, ARCH_CONVERT),
+                               (__int64_t) mp->m_sb.sb_rbmblocks *
+                               mp->m_sb.sb_blocksize);
+
+                       if (!no_modify)  {
+                               *dirty += clear_dinode(mp, dino, lino);
+                               ASSERT(*dirty > 0);
+                       }
+
+                       *cleared = 1;
+                       *used = is_free;
+                       *isa_dir = 0;
+
+                       return(1);
+               }
+               dblkmap = blkmap_alloc(nextents);
+               break;
+       case XR_INO_RTSUM:
+               if (INT_GET(dinoc->di_size, ARCH_CONVERT) != mp->m_rsumsize)  {
+                       do_warn(
+       "realtime summary inode %llu has bad size %lld (should be %d)\n",
+                               lino, INT_GET(dinoc->di_size, ARCH_CONVERT), mp->m_rsumsize);
+
+                       if (!no_modify)  {
+                               *dirty += clear_dinode(mp, dino, lino);
+                               ASSERT(*dirty > 0);
+                       }
+
+                       *cleared = 1;
+                       *used = is_free;
+                       *isa_dir = 0;
+
+                       return(1);
+               }
+               dblkmap = blkmap_alloc(nextents);
+               break;
+       default:
+               break;
+       }
+
+       /*
+        * check for illegal values of forkoff
+        */
+       err = 0;
+       if (dinoc->di_forkoff != 0)  {
+               switch (dinoc->di_format)  {
+               case XFS_DINODE_FMT_DEV:
+                       if (dinoc->di_forkoff !=
+                                       (roundup(sizeof(dev_t), 8) >> 3))  {
+                               do_warn(
+               "bad attr fork offset %d in dev inode %llu, should be %d\n",
+                                       (int) dinoc->di_forkoff,
+                                       lino,
+                                       (int) (roundup(sizeof(dev_t), 8) >> 3));
+                               err = 1;
+                       }
+                       break;
+               case XFS_DINODE_FMT_UUID:
+                       if (dinoc->di_forkoff !=
+                                       (roundup(sizeof(uuid_t), 8) >> 3))  {
+                               do_warn(
+               "bad attr fork offset %d in uuid inode %llu, should be %d\n",
+                                       (int) dinoc->di_forkoff,
+                                       lino,
+                                       (int)(roundup(sizeof(uuid_t), 8) >> 3));
+                               err = 1;
+                       }
+                       break;
+               case XFS_DINODE_FMT_LOCAL:      /* fall through ... */
+               case XFS_DINODE_FMT_EXTENTS:    /* fall through ... */
+               case XFS_DINODE_FMT_BTREE:
+                       if (dinoc->di_forkoff != mp->m_attroffset >> 3)  {
+                               do_warn(
+               "bad attr fork offset %d in inode %llu, should be %d\n",
+                                       (int) dinoc->di_forkoff,
+                                       lino,
+                                       (int) (mp->m_attroffset >> 3));
+                               err = 1;
+                       }
+                       break;
+               default:
+                       do_error("unexpected inode format %d\n",
+                               (int) dinoc->di_format);
+                       break;
+               }
+       }
+
+       if (err)  {
+               if (!no_modify)  {
+                       *dirty += clear_dinode(mp, dino, lino);
+                       ASSERT(*dirty > 0);
+               }
+
+               *cleared = 1;
+               *used = is_free;
+               *isa_dir = 0;
+               blkmap_free(dblkmap);
+               return(1);
+       }
+
+       /*
+        * check data fork -- if it's bad, clear the inode
+        */
+       nextents = 0;
+       switch (dinoc->di_format) {
+       case XFS_DINODE_FMT_LOCAL:
+               err = process_lclinode(mp, agno, ino, dino, type,
+                       dirty, &totblocks, &nextents, &dblkmap,
+                       XFS_DATA_FORK, check_dups);
+               break;
+       case XFS_DINODE_FMT_EXTENTS:
+               err = process_exinode(mp, agno, ino, dino, type,
+                       dirty, &totblocks, &nextents, &dblkmap,
+                       XFS_DATA_FORK, check_dups);
+               break;
+       case XFS_DINODE_FMT_BTREE:
+               err = process_btinode(mp, agno, ino, dino, type,
+                       dirty, &totblocks, &nextents, &dblkmap,
+                       XFS_DATA_FORK, check_dups);
+               break;
+       case XFS_DINODE_FMT_DEV:        /* fall through */
+       case XFS_DINODE_FMT_UUID:
+               err = 0;
+               break;
+       default:
+               do_error("unknown format %d, ino %llu (mode = %d)\n",
+                               dinoc->di_format, lino, INT_GET(dinoc->di_mode, ARCH_CONVERT));
+       }
+
+       if (err)  {
+               /*
+                * problem in the data fork, clear out the inode
+                * and get out
+                */
+               do_warn("bad data fork in inode %llu\n", lino);
+
+               if (!no_modify)  {
+                       *dirty += clear_dinode(mp, dino, lino);
+                       ASSERT(*dirty > 0);
+               }
+
+               *cleared = 1;
+               *used = is_free;
+               *isa_dir = 0;
+               blkmap_free(dblkmap);
+
+               return(1);
+       }
+
+       if (check_dups)  {
+               /*
+                * if check_dups was non-zero, we have to
+                * re-process data fork to set bitmap since the
+                * bitmap wasn't set the first time through
+                */
+               switch (dinoc->di_format) {
+               case XFS_DINODE_FMT_LOCAL:
+                       err = process_lclinode(mp, agno, ino, dino, type,
+                               dirty, &totblocks, &nextents, &dblkmap,
+                               XFS_DATA_FORK, 0);
+                       break;
+               case XFS_DINODE_FMT_EXTENTS:
+                       err = process_exinode(mp, agno, ino, dino, type,
+                               dirty, &totblocks, &nextents, &dblkmap,
+                               XFS_DATA_FORK, 0);
+                       break;
+               case XFS_DINODE_FMT_BTREE:
+                       err = process_btinode(mp, agno, ino, dino, type,
+                               dirty, &totblocks, &nextents, &dblkmap,
+                               XFS_DATA_FORK, 0);
+                       break;
+               case XFS_DINODE_FMT_DEV:        /* fall through */
+               case XFS_DINODE_FMT_UUID:
+                       err = 0;
+                       break;
+               default:
+                       do_error("unknown format %d, ino %llu (mode = %d)\n",
+                                       dinoc->di_format, lino, INT_GET(dinoc->di_mode, ARCH_CONVERT));
+               }
+
+               if (no_modify && err != 0)  {
+                       *cleared = 1;
+                       *used = is_free;
+                       *isa_dir = 0;
+                       blkmap_free(dblkmap);
+
+                       return(1);
+               }
+
+               ASSERT(err == 0);
+       }
+
+       /*
+        * check attribute fork if necessary.  attributes are
+        * always stored in the regular filesystem.
+        */
+
+       if (!XFS_DFORK_Q_ARCH(dino, ARCH_CONVERT) && dinoc->di_aformat != XFS_DINODE_FMT_EXTENTS) {
+               do_warn("bad attribute format %d in inode %llu, ",
+                       dinoc->di_aformat, lino);
+               if (!no_modify) {
+                       do_warn("resetting value\n");
+                       dinoc->di_aformat = XFS_DINODE_FMT_EXTENTS;
+                       *dirty = 1;
+               } else
+                       do_warn("would reset value\n");
+               anextents = 0;
+       } else if (XFS_DFORK_Q_ARCH(dino, ARCH_CONVERT)) {
+               switch (dinoc->di_aformat) {
+               case XFS_DINODE_FMT_LOCAL:
+                       anextents = 0;
+                       err = process_lclinode(mp, agno, ino, dino,
+                               type, dirty, &atotblocks, &anextents, &ablkmap,
+                               XFS_ATTR_FORK, check_dups);
+                       break;
+               case XFS_DINODE_FMT_EXTENTS:
+                       ablkmap = blkmap_alloc(anextents);
+                       anextents = 0;
+                       err = process_exinode(mp, agno, ino, dino,
+                               type, dirty, &atotblocks, &anextents, &ablkmap,
+                               XFS_ATTR_FORK, check_dups);
+                       break;
+               case XFS_DINODE_FMT_BTREE:
+                       ablkmap = blkmap_alloc(anextents);
+                       anextents = 0;
+                       err = process_btinode(mp, agno, ino, dino,
+                               type, dirty, &atotblocks, &anextents, &ablkmap,
+                               XFS_ATTR_FORK, check_dups);
+                       break;
+               default:
+                       anextents = 0;
+                       do_warn("illegal attribute format %d, ino %llu\n",
+                                       dinoc->di_aformat, lino);
+                       err = 1;
+                       break;
+               }
+
+               if (err)  {
+                       /*
+                        * clear the attribute fork if necessary.  we can't
+                        * clear the inode because we've already put the
+                        * inode space info into the blockmap.
+                        *
+                        * XXX - put the inode onto the "move it" list and
+                        *      log the the attribute scrubbing
+                        */
+                       do_warn("bad attribute fork in inode %llu", lino);
+
+                       if (!no_modify)  {
+                               if (delete_attr_ok)  {
+                                       do_warn(", clearing attr fork\n");
+                                       *dirty += clear_dinode_attr(mp,
+                                                       dino, lino);
+                               } else  {
+                                       do_warn("\n");
+                                       *dirty += clear_dinode(mp,
+                                                       dino, lino);
+                               }
+                               ASSERT(*dirty > 0);
+                       } else  {
+                               do_warn(", would clear attr fork\n");
+                       }
+
+                       atotblocks = 0;
+                       anextents = 0;
+
+                       if (delete_attr_ok)  {
+                               if (!no_modify)
+                                       dinoc->di_aformat = XFS_DINODE_FMT_LOCAL;
+                       } else  {
+                               *cleared = 1;
+                               *used = is_free;
+                               *isa_dir = 0;
+                               blkmap_free(dblkmap);
+                               blkmap_free(ablkmap);
+                       }
+                       return(1);
+                       
+               } else if (check_dups)  {
+                       switch (dinoc->di_aformat) {
+                       case XFS_DINODE_FMT_LOCAL:
+                               err = process_lclinode(mp, agno, ino, dino,
+                                       type, dirty, &atotblocks, &anextents,
+                                       &ablkmap, XFS_ATTR_FORK, 0);
+                               break;
+                       case XFS_DINODE_FMT_EXTENTS:
+                               err = process_exinode(mp, agno, ino, dino,
+                                       type, dirty, &atotblocks, &anextents,
+                                       &ablkmap, XFS_ATTR_FORK, 0);
+                               break;
+                       case XFS_DINODE_FMT_BTREE:
+                               err = process_btinode(mp, agno, ino, dino,
+                                       type, dirty, &atotblocks, &anextents,
+                                       &ablkmap, XFS_ATTR_FORK, 0);
+                               break;
+                       default:
+                               do_error("illegal attribute fmt %d, ino %llu\n",
+                                               dinoc->di_aformat, lino);
+                       }
+
+                       if (no_modify && err != 0)  {
+                               *cleared = 1;
+                               *used = is_free;
+                               *isa_dir = 0;
+                               blkmap_free(dblkmap);
+                               blkmap_free(ablkmap);
+
+                               return(1);
+                       }
+
+                       ASSERT(err == 0);
+               }
+
+               /*
+                * do attribute semantic-based consistency checks now
+                */
+
+               /* get this only in phase 3, not in both phase 3 and 4 */
+               if (extra_attr_check) {
+                   if ((err = process_attributes(mp, lino, dino, ablkmap,
+                                   &repair))) {
+                           do_warn("problem with attribute contents in inode %llu\n",lino);
+                           if(!repair) {
+                                   /* clear attributes if not done already */
+                                   if (!no_modify)  {
+                                           *dirty += clear_dinode_attr(
+                                                       mp, dino, lino);
+                                           dinoc->di_aformat =
+                                               XFS_DINODE_FMT_LOCAL;
+                                   } else  {
+                                           do_warn("would clear attr fork\n");
+                                   }
+                                   atotblocks = 0;
+                                   anextents = 0; 
+                           }
+                           else {
+                                   *dirty = 1; /* it's been repaired */
+                            }
+                   }
+               }
+               blkmap_free(ablkmap);
+
+       } else
+               anextents = 0;
+
+       /* 
+       * enforce totblocks is 0 for misc types 
+       */
+       if (process_misc_ino_types_blocks(totblocks, lino, type)) {
+               if (!no_modify)  {
+                       *dirty += clear_dinode(mp, dino, lino);
+                       ASSERT(*dirty > 0);
+               }
+               *cleared = 1;
+               *used = is_free;
+               *isa_dir = 0;
+               blkmap_free(dblkmap);
+
+               return(1);
+       }
+
+       /*
+        * correct space counters if required
+        */
+       if (totblocks + atotblocks != INT_GET(dinoc->di_nblocks, ARCH_CONVERT))  {
+               if (!no_modify)  {
+       do_warn("correcting nblocks for inode %llu, was %llu - counted %llu\n",
+                               lino, INT_GET(dinoc->di_nblocks, ARCH_CONVERT),
+                               totblocks + atotblocks);
+                       *dirty = 1;
+                       INT_SET(dinoc->di_nblocks, ARCH_CONVERT, totblocks + atotblocks);
+               } else  {
+               do_warn(
+       "bad nblocks %llu for inode %llu, would reset to %llu\n",
+                               INT_GET(dinoc->di_nblocks, ARCH_CONVERT), lino,
+                               totblocks + atotblocks);
+               }
+       }
+
+       if (nextents > MAXEXTNUM)  {
+               do_warn("too many data fork extents (%llu) in inode %llu\n",
+                       nextents, lino);
+
+               if (!no_modify)  {
+                       *dirty += clear_dinode(mp, dino, lino);
+                       ASSERT(*dirty > 0);
+               }
+               *cleared = 1;
+               *used = is_free;
+               *isa_dir = 0;
+               blkmap_free(dblkmap);
+
+               return(1);
+       }
+       if (nextents != INT_GET(dinoc->di_nextents, ARCH_CONVERT))  {
+               if (!no_modify)  {
+       do_warn("correcting nextents for inode %llu, was %d - counted %llu\n",
+                               lino, INT_GET(dinoc->di_nextents, ARCH_CONVERT), nextents);
+                       *dirty = 1;
+                       INT_SET(dinoc->di_nextents, ARCH_CONVERT, (xfs_extnum_t) nextents);
+               } else  {
+                       do_warn(
+               "bad nextents %d for inode %llu, would reset to %llu\n",
+                               INT_GET(dinoc->di_nextents, ARCH_CONVERT), lino, nextents);
+               }
+       }
+
+       if (anextents > MAXAEXTNUM)  {
+               do_warn("too many attr fork extents (%llu) in inode %llu\n",
+                       anextents, lino);
+
+               if (!no_modify)  {
+                       *dirty += clear_dinode(mp, dino, lino);
+                       ASSERT(*dirty > 0);
+               }
+               *cleared = 1;
+               *used = is_free;
+               *isa_dir = 0;
+               blkmap_free(dblkmap);
+
+               return(1);
+       }
+       if (anextents != INT_GET(dinoc->di_anextents, ARCH_CONVERT))  {
+               if (!no_modify)  {
+       do_warn("correcting anextents for inode %llu, was %d - counted %llu\n",
+                               lino, INT_GET(dinoc->di_anextents, ARCH_CONVERT), anextents);
+                       *dirty = 1;
+                       INT_SET(dinoc->di_anextents, ARCH_CONVERT, (xfs_aextnum_t) anextents);
+               } else  {
+                       do_warn(
+               "bad anextents %d for inode %llu, would reset to %llu\n",
+                               INT_GET(dinoc->di_anextents, ARCH_CONVERT), lino, anextents);
+               }
+       }
+
+       /*
+        * do any semantic type-based checking here
+        */
+       switch (type)  {
+       case XR_INO_DIR:
+               if (XFS_SB_VERSION_HASDIRV2(&mp->m_sb))
+                       err = process_dir2(mp, lino, dino, ino_discovery,
+                                       dirty, "", parent, dblkmap);
+               else
+                       err = process_dir(mp, lino, dino, ino_discovery,
+                                       dirty, "", parent, dblkmap);
+               if (err)
+                       do_warn(
+                       "problem with directory contents in inode %llu\n",
+                               lino);
+               break;
+       case XR_INO_RTBITMAP:
+               /* process_rtbitmap XXX */
+               err = 0;
+               break;
+       case XR_INO_RTSUM:
+               /* process_rtsummary XXX */
+               err = 0;
+               break;
+       case XR_INO_SYMLINK:
+               if ((err = process_symlink(mp, lino, dino, dblkmap)))
+                       do_warn("problem with symbolic link in inode %llu\n",
+                               lino);
+               break;
+       case XR_INO_DATA:       /* fall through to FIFO case ... */
+       case XR_INO_RTDATA:     /* fall through to FIFO case ... */
+       case XR_INO_CHRDEV:     /* fall through to FIFO case ... */
+       case XR_INO_BLKDEV:     /* fall through to FIFO case ... */
+       case XR_INO_SOCK:       /* fall through to FIFO case ... */
+       case XR_INO_FIFO:
+               err = 0;
+               break;
+       default:
+               printf("Unexpected inode type\n");
+               abort();
+       }
+
+       blkmap_free(dblkmap);
+
+       if (err)  {
+               /*
+                * problem in the inode type-specific semantic
+                * checking, clear out the inode and get out
+                */
+               if (!no_modify)  {
+                       *dirty += clear_dinode(mp, dino, lino);
+                       ASSERT(*dirty > 0);
+               }
+               *cleared = 1;
+               *used = is_free;
+               *isa_dir = 0;
+
+               return(1);
+       }
+
+       /*
+        * check nlinks feature, if it's a version 1 inode,
+        * just leave nlinks alone.  even if it's set wrong,
+        * it'll be reset when read in.
+        */
+       if (dinoc->di_version > XFS_DINODE_VERSION_1 && !fs_inode_nlink)  {
+               /*
+                * do we have a fs/inode version mismatch with a valid
+                * version 2 inode here that has to stay version 2 or
+                * lose links?
+                */
+               if (INT_GET(dinoc->di_nlink, ARCH_CONVERT) > XFS_MAXLINK_1)  {
+                       /*
+                        * yes.  are nlink inodes allowed?
+                        */
+                       if (fs_inode_nlink_allowed)  {
+                               /*
+                                * yes, update status variable which will
+                                * cause sb to be updated later.
+                                */
+                               fs_inode_nlink = 1;
+                               do_warn(
+                               "version 2 inode %llu claims > %u links,",
+                                       lino, XFS_MAXLINK_1);
+                               if (!no_modify)  {
+                                       do_warn(
+                       "updating superblock version number\n");
+                               } else  {
+                                       do_warn(
+                       "would update superblock version number\n");
+                               }
+                       } else  {
+                               /*
+                                * no, have to convert back to onlinks
+                                * even if we lose some links
+                                */
+                               do_warn(
+                       "WARNING:  version 2 inode %llu claims > %u links,",
+                                       lino, XFS_MAXLINK_1);
+                               if (!no_modify)  {
+                                       do_warn(
+       "converting back to version 1,\n\tthis may destroy %d links\n",
+                                               INT_GET(dinoc->di_nlink, ARCH_CONVERT)
+                                               - XFS_MAXLINK_1);
+
+                                       dinoc->di_version =
+                                               XFS_DINODE_VERSION_1;
+                                       INT_SET(dinoc->di_nlink, ARCH_CONVERT, XFS_MAXLINK_1);
+                                       INT_SET(dinoc->di_onlink, ARCH_CONVERT, XFS_MAXLINK_1);
+
+                                       *dirty = 1;
+                               } else  {
+                                       do_warn(
+       "would convert back to version 1,\n\tthis might destroy %d links\n",
+                                               INT_GET(dinoc->di_nlink, ARCH_CONVERT)
+                                               - XFS_MAXLINK_1);
+                               }
+                       }
+               } else  {
+                       /*
+                        * do we have a v2 inode that we could convert back
+                        * to v1 without losing any links?  if we do and
+                        * we have a mismatch between superblock bits and the
+                        * version bit, alter the version bit in this case.
+                        *
+                        * the case where we lost links was handled above.
+                        */
+                       do_warn("found version 2 inode %llu, ", lino);
+                       if (!no_modify)  {
+                               do_warn("converting back to version 1\n");
+
+                               dinoc->di_version =
+                                       XFS_DINODE_VERSION_1;
+                               INT_SET(dinoc->di_onlink, ARCH_CONVERT, INT_GET(dinoc->di_nlink, ARCH_CONVERT));
+
+                               *dirty = 1;
+                       } else  {
+                               do_warn("would convert back to version 1\n");
+                       }
+               }
+       }
+
+       /*
+        * ok, if it's still a version 2 inode, it's going
+        * to stay a version 2 inode.  it should have a zero
+        * onlink field, so clear it.
+        */
+       if (dinoc->di_version > XFS_DINODE_VERSION_1 &&
+                       INT_GET(dinoc->di_onlink, ARCH_CONVERT) > 0 && fs_inode_nlink > 0)  {
+               if (!no_modify)  {
+                       do_warn(
+"clearing obsolete nlink field in version 2 inode %llu, was %d, now 0\n",
+                               lino, INT_GET(dinoc->di_onlink, ARCH_CONVERT));
+                       INT_ZERO(dinoc->di_onlink, ARCH_CONVERT);
+                       *dirty = 1;
+               } else  {
+                       do_warn(
+"would clear obsolete nlink field in version 2 inode %llu, currently %d\n",
+                               lino, INT_GET(dinoc->di_onlink, ARCH_CONVERT));
+                       *dirty = 1;
+               }
+       }
+
+       return(retval > 0 ? 1 : 0);
+}
+
+/*
+ * returns 1 if inode is used, 0 if free.
+ * performs any necessary salvaging actions.
+ * note that we leave the generation count alone
+ * because nothing we could set it to would be
+ * guaranteed to be correct so the best guess for
+ * the correct value is just to leave it alone.
+ *
+ * The trick is detecting empty files.  For those,
+ * the core and the forks should all be in the "empty"
+ * or zero-length state -- a zero or possibly minimum length
+ * (in the case of dirs) extent list -- although inline directories
+ * and symlinks might be handled differently.  So it should be
+ * possible to sanity check them against each other.
+ *
+ * If the forks are an empty extent list though, then forget it.
+ * The file is toast anyway since we can't recover its storage.
+ *
+ * Parameters:
+ *     Ins:
+ *             mp -- mount structure
+ *             dino -- pointer to on-disk inode structure
+ *             agno/ino -- inode numbers
+ *             free -- whether the map thinks the inode is free (1 == free)
+ *             ino_discovery -- whether we should examine directory
+ *                             contents to discover new inodes
+ *             check_dups -- whether we should check to see if the
+ *                             inode references duplicate blocks
+ *                             if so, we compare the inode's claimed
+ *                             blocks against the contents of the
+ *                             duplicate extent list but we don't
+ *                             set the bitmap.  If not, we set the
+ *                             bitmap and try and detect multiply
+ *                             claimed blocks using the bitmap.
+ *     Outs:
+ *             dirty -- whether we changed the inode (1 == yes)
+ *             cleared -- whether we cleared the inode (1 == yes).  In
+ *                             no modify mode, if we would have cleared it
+ *             used -- 1 if the inode is used, 0 if free.  In no modify
+ *                     mode, whether the inode should be used or free
+ *             isa_dir -- 1 if the inode is a directory, 0 if not.  In
+ *                     no modify mode, if the inode would be a dir or not.
+ *
+ *     Return value -- 0 if the inode is good, 1 if it is/was corrupt
+ */
+
+int
+process_dinode(xfs_mount_t *mp,
+               xfs_dinode_t *dino,
+               xfs_agnumber_t agno,
+               xfs_agino_t ino,
+               int was_free,
+               int *dirty,
+               int *cleared,
+               int *used,
+               int ino_discovery,
+               int check_dups,
+               int extra_attr_check,
+               int *isa_dir,
+               xfs_ino_t *parent)
+{
+       const int verify_mode = 0;
+       const int uncertain = 0;
+
+#ifdef XR_INODE_TRACE
+       fprintf(stderr, "processing inode %d/%d\n", agno, ino);
+#endif
+       return(process_dinode_int(mp, dino, agno, ino, was_free, dirty,
+                               cleared, used, verify_mode, uncertain,
+                               ino_discovery, check_dups, extra_attr_check,
+                               isa_dir, parent));
+}
+
+/*
+ * a more cursory check, check inode core, *DON'T* check forks
+ * this basically just verifies whether the inode is an inode
+ * and whether or not it has been totally trashed.  returns 0
+ * if the inode passes the cursory sanity check, 1 otherwise.
+ */
+int
+verify_dinode(xfs_mount_t *mp,
+               xfs_dinode_t *dino,
+               xfs_agnumber_t agno,
+               xfs_agino_t ino)
+{
+       xfs_ino_t parent;
+       int cleared = 0;
+       int used = 0;
+       int dirty = 0;
+       int isa_dir = 0;
+       const int verify_mode = 1;
+       const int check_dups = 0;
+       const int ino_discovery = 0;
+       const int uncertain = 0;
+
+       return(process_dinode_int(mp, dino, agno, ino, 0, &dirty,
+                               &cleared, &used, verify_mode,
+                               uncertain, ino_discovery, check_dups,
+                               0, &isa_dir, &parent));
+}
+
+/*
+ * like above only for inode on the uncertain list.  it sets
+ * the uncertain flag which makes process_dinode_int quieter.
+ * returns 0 if the inode passes the cursory sanity check, 1 otherwise.
+ */
+int
+verify_uncertain_dinode(xfs_mount_t *mp,
+               xfs_dinode_t *dino,
+               xfs_agnumber_t agno,
+               xfs_agino_t ino)
+{
+       xfs_ino_t parent;
+       int cleared = 0;
+       int used = 0;
+       int dirty = 0;
+       int isa_dir = 0;
+       const int verify_mode = 1;
+       const int check_dups = 0;
+       const int ino_discovery = 0;
+       const int uncertain = 1;
+
+       return(process_dinode_int(mp, dino, agno, ino, 0, &dirty,
+                               &cleared, &used, verify_mode,
+                               uncertain, ino_discovery, check_dups,
+                               0, &isa_dir, &parent));
+}
diff --git a/repair/dinode.h b/repair/dinode.h
new file mode 100644 (file)
index 0000000..196068a
--- /dev/null
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef _XR_DINODE_H
+#define _XR_DINODE_H
+
+struct blkmap;
+
+int
+verify_agbno(xfs_mount_t       *mp,
+               xfs_agnumber_t  agno,
+               xfs_agblock_t   agbno);
+
+int
+verify_dfsbno(xfs_mount_t      *mp,
+               xfs_dfsbno_t    fsbno);
+
+void
+convert_extent(
+       xfs_bmbt_rec_32_t       *rp,
+       xfs_dfiloff_t           *op,    /* starting offset (blockno in file) */
+       xfs_dfsbno_t            *sp,    /* starting block (fs blockno) */
+       xfs_dfilblks_t          *cp,    /* blockcount */
+       int                     *fp);   /* extent flag */
+
+int    
+process_bmbt_reclist(xfs_mount_t       *mp,
+               xfs_bmbt_rec_32_t       *rp,
+               int                     numrecs,
+               int                     type,
+               xfs_ino_t               ino,
+               xfs_drfsbno_t           *tot,
+               struct blkmap           **blkmapp,
+               __uint64_t              *first_key,
+               __uint64_t              *last_key,
+               int                     whichfork);
+
+int
+scan_bmbt_reclist(
+       xfs_mount_t             *mp,
+       xfs_bmbt_rec_32_t       *rp,
+       int                     numrecs,
+       int                     type,
+       xfs_ino_t               ino,
+       xfs_drfsbno_t           *tot,
+       int                     whichfork);
+
+int
+verify_inode_chunk(xfs_mount_t         *mp,
+                       xfs_ino_t       ino,
+                       xfs_ino_t       *start_ino);
+
+int    verify_aginode_chunk(xfs_mount_t        *mp,
+                               xfs_agnumber_t  agno,
+                               xfs_agino_t     agino,
+                               xfs_agino_t     *agino_start);
+
+int
+clear_dinode(xfs_mount_t *mp, xfs_dinode_t *dino, xfs_ino_t ino_num);
+
+void
+update_rootino(xfs_mount_t *mp);
+
+int
+process_dinode(xfs_mount_t *mp,
+               xfs_dinode_t *dino,
+               xfs_agnumber_t agno,
+               xfs_agino_t ino,
+               int was_free,
+               int *dirty,
+               int *tossit,
+               int *used,
+               int check_dirs,
+               int check_dups,
+               int extra_attr_check,
+               int *isa_dir,
+               xfs_ino_t *parent);
+
+int
+verify_dinode(xfs_mount_t *mp,
+               xfs_dinode_t *dino,
+               xfs_agnumber_t agno,
+               xfs_agino_t ino);
+
+int
+verify_uncertain_dinode(xfs_mount_t *mp,
+               xfs_dinode_t *dino,
+               xfs_agnumber_t agno,
+               xfs_agino_t ino);
+
+int
+verify_inum(xfs_mount_t                *mp,
+               xfs_ino_t       ino);
+
+int
+verify_aginum(xfs_mount_t      *mp,
+               xfs_agnumber_t  agno,
+               xfs_agino_t     agino);
+
+int
+process_uncertain_aginodes(xfs_mount_t         *mp,
+                               xfs_agnumber_t  agno);
+void
+process_aginodes(xfs_mount_t   *mp,
+               xfs_agnumber_t  agno,
+               int             check_dirs,
+               int             check_dups,
+               int             extra_attr_check);
+
+void
+check_uncertain_aginodes(xfs_mount_t   *mp,
+                       xfs_agnumber_t  agno);
+
+xfs_buf_t *
+get_agino_buf(xfs_mount_t      *mp,
+               xfs_agnumber_t  agno,
+               xfs_agino_t     agino,
+               xfs_dinode_t    **dipp);
+
+xfs_dfsbno_t
+get_bmapi(xfs_mount_t          *mp,
+               xfs_dinode_t    *dip,
+               xfs_ino_t       ino_num,
+               xfs_dfiloff_t   bno,
+               int             whichfork );
+
+#endif /* _XR_DINODE_H */
diff --git a/repair/dir.c b/repair/dir.c
new file mode 100644 (file)
index 0000000..4854b54
--- /dev/null
@@ -0,0 +1,3033 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "avl.h"
+#include "globals.h"
+#include "agheader.h"
+#include "incore.h"
+#include "protos.h"
+#include "err_protos.h"
+#include "dinode.h"
+#include "dir.h"
+#include "bmap.h"
+
+#if XFS_DIR_LEAF_MAPSIZE >= XFS_ATTR_LEAF_MAPSIZE
+#define XR_DA_LEAF_MAPSIZE     XFS_DIR_LEAF_MAPSIZE
+#else
+#define XR_DA_LEAF_MAPSIZE     XFS_ATTR_LEAF_MAPSIZE
+#endif
+
+
+
+typedef struct da_hole_map  {
+       int     lost_holes;
+       int     num_holes;
+       struct {
+               int     base;
+               int     size;
+       } hentries[XR_DA_LEAF_MAPSIZE];
+} da_hole_map_t;
+
+/*
+ * takes a name and length (name need not be null-terminated)
+ * and returns 1 if the name contains a '/' or a \0, returns 0
+ * otherwise
+ */
+int
+namecheck(char *name, int length)
+{
+       char *c;
+       int i;
+
+       ASSERT(length < MAXNAMELEN);
+
+       for (c = name, i = 0; i < length; i++, c++)  {
+               if (*c == '/' || *c == '\0')
+                       return(1);
+       }
+
+       return(0);
+}
+
+/*
+ * this routine performs inode discovery and tries to fix things
+ * in place.  available redundancy -- inode data size should match
+ * used directory space in inode.  returns number of valid directory
+ * entries.  a non-zero return value means the directory is bogus
+ * and should be blasted.
+ */
+/* ARGSUSED */
+int
+process_shortform_dir(
+       xfs_mount_t     *mp,
+       xfs_ino_t       ino,
+       xfs_dinode_t    *dip,
+       int             ino_discovery,
+       int             *dino_dirty,    /* out - 1 if dinode buffer dirty? */
+       xfs_ino_t       *parent,        /* out - NULLFSINO if entry doesn't exist */
+       char            *dirname,       /* directory pathname */
+       int             *repair)        /* out - 1 if dir was fixed up */
+{
+       xfs_dir_shortform_t     *sf;
+       xfs_dir_sf_entry_t      *sf_entry, *next_sfe, *tmp_sfe;
+       xfs_ino_t               lino;
+       int                     max_size;
+       __int64_t               ino_dir_size;
+       int                     num_entries;
+       int                     ino_off;
+       int                     namelen;
+       int                     i;
+       int                     junkit;
+       int                     tmp_len;
+       int                     tmp_elen;
+       int                     bad_sfnamelen;
+       ino_tree_node_t         *irec_p;
+       char                    name[MAXNAMELEN + 1];
+
+#ifdef XR_DIR_TRACE
+       fprintf(stderr, "process_shortform_dir - inode %llu\n", ino);
+#endif
+
+       sf = &dip->di_u.di_dirsf;
+
+       max_size = XFS_DFORK_DSIZE_ARCH(dip, mp, ARCH_CONVERT);
+       num_entries = INT_GET(sf->hdr.count, ARCH_CONVERT);
+       ino_dir_size = INT_GET(dip->di_core.di_size, ARCH_CONVERT);
+       *repair = 0;
+
+       ASSERT(ino_dir_size <= max_size);
+
+       /*
+        * check for bad entry count
+        */
+       if (num_entries * sizeof(xfs_dir_sf_entry_t) + sizeof(xfs_dir_sf_hdr_t)
+                       > max_size || num_entries == 0)
+               num_entries = 0xFF;
+
+       /*
+        * run through entries, stop at first bad entry, don't need
+        * to check for .. since that's encoded in its own field
+        */
+       sf_entry = next_sfe = &sf->list[0];
+       for (i = 0; i < num_entries && ino_dir_size >
+                               (__psint_t)next_sfe - (__psint_t)sf; i++)  {
+               tmp_sfe = NULL;
+               sf_entry = next_sfe;
+               junkit = 0;
+               bad_sfnamelen = 0;
+               XFS_DIR_SF_GET_DIRINO_ARCH(&sf_entry->inumber, &lino, ARCH_CONVERT);
+
+               /*
+                * if entry points to self, junk it since only '.' or '..'
+                * should do that and shortform dirs don't contain either
+                * entry.  if inode number is invalid, trash entry.
+                * if entry points to special inodes, trash it.
+                * if inode is unknown but number is valid,
+                * add it to the list of uncertain inodes.  don't
+                * have to worry about an entry pointing to a
+                * deleted lost+found inode because the entry was
+                * deleted at the same time that the inode was cleared.
+                */
+               if (lino == ino)  {
+                       junkit = 1;
+               } else if (verify_inum(mp, lino))  {
+                       /*
+                        * junk the entry, mark lino as NULL since it's bad
+                        */
+                       do_warn("invalid inode number %llu in directory %llu\n",
+                               lino, ino);
+                       lino = NULLFSINO;
+                       junkit = 1;
+               } else if (lino == mp->m_sb.sb_rbmino)  {
+                       do_warn(
+       "entry in shorform dir %llu references realtime bitmap inode %llu\n",
+                               ino, lino);
+                       junkit = 1;
+               } else if (lino == mp->m_sb.sb_rsumino)  {
+                       do_warn(
+       "entry in shorform dir %llu references realtime summary inode %llu\n",
+                               ino, lino);
+                       junkit = 1;
+               } else if (lino == mp->m_sb.sb_uquotino)  {
+                       do_warn(
+       "entry in shorform dir %llu references user quota inode %llu\n",
+                               ino, lino);
+                       junkit = 1;
+               } else if (lino == mp->m_sb.sb_pquotino)  {
+                       do_warn(
+       "entry in shorform dir %llu references proj quota inode %llu\n",
+                               ino, lino);
+                       junkit = 1;
+               } else if ((irec_p = find_inode_rec(XFS_INO_TO_AGNO(mp, lino),
+                                       XFS_INO_TO_AGINO(mp, lino))) != NULL)  {
+                       /*
+                        * if inode is marked free and we're in inode
+                        * discovery mode, leave the entry alone for now.
+                        * if the inode turns out to be used, we'll figure
+                        * that out when we scan it.  If the inode really
+                        * is free, we'll hit this code again in phase 4
+                        * after we've finished inode discovery and blow
+                        * out the entry then.
+                        */
+                       ino_off = XFS_INO_TO_AGINO(mp, lino) -
+                               irec_p->ino_startnum;
+                       ASSERT(is_inode_confirmed(irec_p, ino_off));
+
+                       if (!ino_discovery && is_inode_free(irec_p, ino_off))  {
+                               do_warn(
+       "entry references free inode %llu in shortform directory %llu\n",
+                                       lino, ino);
+                               junkit = 1;
+                       }
+               } else if (ino_discovery) {
+                       /*
+                        * put the inode on the uncertain list.  we'll
+                        * pull the inode off the list and check it later.
+                        * if the inode turns out be bogus, we'll delete
+                        * this entry in phase 6.
+                        */
+                       add_inode_uncertain(mp, lino, 0);
+               } else  {
+                       /*
+                        * blow the entry out.  we know about all
+                        * undiscovered entries now (past inode discovery
+                        * phase) so this is clearly a bogus entry.
+                        */
+                       do_warn(
+       "entry references non-existent inode %llu in shortform dir %llu\n",
+                                       lino, ino);
+                       junkit = 1;
+               }
+
+               namelen = sf_entry->namelen;
+
+               if (namelen == 0)  {
+                       /*
+                        * if we're really lucky, this is
+                        * the last entry in which case we
+                        * can use the dir size to set the
+                        * namelen value.  otherwise, forget
+                        * it because we're not going to be
+                        * able to find the next entry.
+                        */
+                       bad_sfnamelen = 1;
+
+                       if (i == num_entries - 1)  {
+                               namelen = ino_dir_size -
+                                       ((__psint_t) &sf_entry->name[0] -
+                                        (__psint_t) sf);
+                               if (!no_modify)  {
+                                       do_warn(
+               "zero length entry in shortform dir %llu, resetting to %d\n",
+                                               ino, namelen);
+                                       sf_entry->namelen = namelen;
+                               } else  {
+                                       do_warn(
+               "zero length entry in shortform dir %llu, would set to %d\n",
+                                               ino, namelen);
+                               }
+                       } else  {
+                               do_warn(
+       "zero length entry in shortform dir %llu",
+                                       ino);
+                               if (!no_modify)
+                                       do_warn(", junking %d entries\n",
+                                               num_entries - i);
+                               else
+                                       do_warn(", would junk %d entries\n",
+                                               num_entries - i);
+                               /*
+                                * don't process the rest of the directory,
+                                * break out of processing looop
+                                */
+                               break;
+                       }
+               } else if ((__psint_t) sf_entry - (__psint_t) sf +
+                               + XFS_DIR_SF_ENTSIZE_BYENTRY(sf_entry)
+                               > ino_dir_size)  {
+                       bad_sfnamelen = 1;
+
+                       if (i == num_entries - 1)  {
+                               namelen = ino_dir_size -
+                                       ((__psint_t) &sf_entry->name[0] -
+                                        (__psint_t) sf);
+                               do_warn(
+       "size of last entry overflows space left in in shortform dir %llu, ",
+                                       ino);
+                               if (!no_modify)  {
+                                       do_warn("resetting to %d\n",
+                                               namelen);
+                                       sf_entry->namelen = namelen;
+                                       *dino_dirty = 1;
+                               } else  {
+                                       do_warn("would reset to %d\n",
+                                               namelen);
+                               }
+                       } else  {
+                               do_warn(
+       "size of entry #%d overflows space left in in shortform dir %llu\n",
+                                       i, ino);
+                               if (!no_modify)  {
+                                       if (i == num_entries - 1)
+                                               do_warn("junking entry #%d\n",
+                                                       i);
+                                       else
+                                               do_warn(
+                                               "junking %d entries\n",
+                                                       num_entries - i);
+                               } else  {
+                                       if (i == num_entries - 1)
+                                               do_warn(
+                                               "would junk entry #%d\n",
+                                                       i);
+                                       else
+                                               do_warn(
+                                               "would junk %d entries\n",
+                                                       num_entries - i);
+                               }
+
+                               break;
+                       }
+               }
+
+               /*
+                * check for illegal chars in name.
+                * no need to check for bad length because
+                * the length value is stored in a byte
+                * so it can't be too big, it can only wrap
+                */
+               if (namecheck((char *)&sf_entry->name[0], namelen))  {
+                       /*
+                        * junk entry
+                        */
+                       do_warn(
+               "entry contains illegal character in shortform dir %llu\n",
+                               ino);
+                       junkit = 1;
+               }
+
+               /*
+                * junk the entry by copying up the rest of the
+                * fork over the current entry and decrementing
+                * the entry count.  if we're in no_modify mode,
+                * just issue the warning instead.  then continue
+                * the loop with the next_sfe pointer set to the
+                * correct place in the fork and other counters
+                * properly set to reflect the deletion if it
+                * happened.
+                */
+               if (junkit)  {
+                       bcopy(sf_entry->name, name, namelen);
+                       name[namelen] = '\0';
+
+                       if (!no_modify)  {
+                               tmp_elen = XFS_DIR_SF_ENTSIZE_BYENTRY(sf_entry);
+                               INT_MOD(dip->di_core.di_size, ARCH_CONVERT, -(tmp_elen));
+                               ino_dir_size -= tmp_elen;
+
+                               tmp_sfe = (xfs_dir_sf_entry_t *)
+                                       ((__psint_t) sf_entry + tmp_elen);
+                               tmp_len = max_size - ((__psint_t) tmp_sfe
+                                                       - (__psint_t) sf);
+
+                               memmove(sf_entry, tmp_sfe, tmp_len);
+
+                               INT_MOD(sf->hdr.count, ARCH_CONVERT, -1);
+                               num_entries--;
+                               bzero((void *) ((__psint_t) sf_entry + tmp_len),
+                                       tmp_elen);
+
+                               /*
+                                * reset the tmp value to the current
+                                * pointer so we'll process the entry
+                                * we just moved up
+                                */
+                               tmp_sfe = sf_entry;
+
+                               /*
+                                * WARNING:  drop the index i by one
+                                * so it matches the decremented count
+                                * for accurate comparisons later
+                                */
+                               i--;
+
+                               *dino_dirty = 1;
+                               *repair = 1;
+
+                               do_warn(
+                       "junking entry \"%s\" in directory inode %llu\n",
+                                       name, ino);
+                       } else  {
+                               do_warn(
+               "would have junked entry \"%s\" in directory inode %llu\n",
+                                       name, ino);
+                       }
+               }
+
+               /*
+                * go onto next entry unless we've just junked an
+                * entry in which the current entry pointer points
+                * to an unprocessed entry.  have to take into zero-len
+                * entries into account in no modify mode since we
+                * calculate size based on next_sfe.
+                */
+               next_sfe = (tmp_sfe == NULL)
+                       ? (xfs_dir_sf_entry_t *) ((__psint_t) sf_entry
+                               + ((!bad_sfnamelen)
+                                       ? XFS_DIR_SF_ENTSIZE_BYENTRY(sf_entry)
+                                       : sizeof(xfs_dir_sf_entry_t) - 1
+                                               + namelen))
+                       : tmp_sfe;
+       }
+
+       /* sync up sizes and entry counts */
+
+       if (INT_GET(sf->hdr.count, ARCH_CONVERT) != i)  {
+               if (no_modify)  {
+do_warn("would have corrected entry count in directory %llu from %d to %d\n",
+                       ino, INT_GET(sf->hdr.count, ARCH_CONVERT), i);
+               } else  {
+do_warn("corrected entry count in directory %llu, was %d, now %d\n",
+                       ino, INT_GET(sf->hdr.count, ARCH_CONVERT), i);
+                       INT_SET(sf->hdr.count, ARCH_CONVERT, i);
+                       *dino_dirty = 1;
+                       *repair = 1;
+               }
+       }
+
+       if ((__psint_t) next_sfe - (__psint_t) sf != ino_dir_size)  {
+               if (no_modify)  {
+                       do_warn(
+               "would have corrected directory %llu size from %lld to %lld\n",
+                               ino, (__int64_t) ino_dir_size,
+                       (__int64_t)((__psint_t) next_sfe - (__psint_t) sf));
+               } else  {
+                       do_warn(
+                       "corrected directory %llu size, was %lld, now %lld\n",
+                               ino, (__int64_t) ino_dir_size,
+                       (__int64_t)((__psint_t) next_sfe - (__psint_t) sf));
+
+                       INT_SET(dip->di_core.di_size, ARCH_CONVERT, (xfs_fsize_t)
+                                       ((__psint_t) next_sfe - (__psint_t) sf));
+                       *dino_dirty = 1;
+                       *repair = 1;
+               }
+       }
+       /*
+        * check parent (..) entry
+        */
+       XFS_DIR_SF_GET_DIRINO_ARCH(&sf->hdr.parent, parent, ARCH_CONVERT);
+
+       /*
+        * if parent entry is bogus, null it out.  we'll fix it later .
+        */
+       if (verify_inum(mp, *parent))  {
+               *parent = NULLFSINO;
+
+               do_warn(
+       "bogus .. inode number (%llu) in directory inode %llu,",
+                               *parent, ino);
+               if (!no_modify)  {
+                       do_warn("clearing inode number\n");
+
+                       XFS_DIR_SF_PUT_DIRINO_ARCH(parent, &sf->hdr.parent, ARCH_CONVERT);
+                       *dino_dirty = 1;
+                       *repair = 1;
+               } else  {
+                       do_warn("would clear inode number\n");
+               }
+       } else if (ino == mp->m_sb.sb_rootino && ino != *parent) {
+               /*
+                * root directories must have .. == .
+                */
+               if (!no_modify)  {
+                       do_warn(
+       "corrected root directory %llu .. entry, was %llu, now %llu\n",
+                               ino, *parent, ino);
+                       *parent = ino;
+                       XFS_DIR_SF_PUT_DIRINO_ARCH(parent, &sf->hdr.parent, ARCH_CONVERT);
+                       *dino_dirty = 1;
+                       *repair = 1;
+               } else  {
+                       do_warn(
+       "would have corrected root directory %llu .. entry from %llu to %llu\n",
+                               ino, *parent, ino);
+               }
+       } else if (ino == *parent && ino != mp->m_sb.sb_rootino)  {
+               /*
+                * likewise, non-root directories can't have .. pointing
+                * to .
+                */
+               *parent = NULLFSINO;
+               do_warn("bad .. entry in dir ino %llu, points to self,",
+                       ino);
+               if (!no_modify)  {
+                       do_warn(" clearing inode number\n");
+
+                       XFS_DIR_SF_PUT_DIRINO_ARCH(parent, &sf->hdr.parent, ARCH_CONVERT);
+                       *dino_dirty = 1;
+                       *repair = 1;
+               } else  {
+                       do_warn(" would clear inode number\n");
+               }
+       }
+
+       return(0);
+}
+
+/*
+ * freespace map for directory leaf blocks (1 bit per byte)
+ * 1 == used, 0 == free
+ */
+static da_freemap_t dir_freemap[DA_BMAP_SIZE];
+
+#if 0
+unsigned char *
+alloc_da_freemap(xfs_mount_t *mp)
+{
+       unsigned char *freemap;
+
+       if ((freemap = malloc(mp->m_sb.sb_blocksize)) == NULL)
+               return(NULL);
+
+       bzero(freemap, mp->m_sb.sb_blocksize/NBBY);
+
+       return(freemap);
+}
+#endif
+
+void
+init_da_freemap(da_freemap_t *dir_freemap)
+{
+       bzero(dir_freemap, sizeof(da_freemap_t) * DA_BMAP_SIZE);
+}
+
+/*
+ * sets directory freemap, returns 1 if there is a conflict
+ * returns 0 if everything's good.  the range [start, stop) is set.
+ * right now, we just use the static array since only one directory
+ * block will be processed at once even though the interface allows
+ * you to pass in arbitrary da_freemap_t array's.
+ *
+ * Within a char, the lowest bit of the char represents the byte with
+ * the smallest address
+ */
+int
+set_da_freemap(xfs_mount_t *mp, da_freemap_t *map, int start, int stop)
+{
+       const da_freemap_t mask = 0x1;
+       int i;
+
+       if (start > stop)  {
+               /*
+                * allow == relation since [x, x) claims 1 byte
+                */
+               do_warn("bad range claimed [%d, %d) in da block\n",
+                       start, stop);
+               return(1);
+       }
+
+       if (stop > mp->m_sb.sb_blocksize)  {
+               do_warn(
+               "byte range end [%d %d) in da block larger than blocksize %d\n",
+                       start, stop, mp->m_sb.sb_blocksize);
+               return(1);
+       }
+
+       for (i = start; i < stop; i ++)  {
+               if (map[i / NBBY] & (mask << i % NBBY))  {
+                       do_warn("multiply claimed byte %d in da block\n", i);
+                       return(1);
+               }
+               map[i / NBBY] |= (mask << i % NBBY);
+       }
+
+       return(0);
+}
+
+/*
+ * returns 0 if holemap is consistent with reality (as expressed by
+ * the da_freemap_t).  returns 1 if there's a conflict.
+ */
+int
+verify_da_freemap(xfs_mount_t *mp, da_freemap_t *map, da_hole_map_t *holes,
+                       xfs_ino_t ino, xfs_dablk_t da_bno)
+{
+       int i, j, start, len;
+       const da_freemap_t mask = 0x1;
+
+       for (i = 0; i < XFS_DIR_LEAF_MAPSIZE; i++)  {
+               if (holes->hentries[i].size == 0)
+                       continue;
+               
+               start = holes->hentries[i].base;
+               len = holes->hentries[i].size;
+
+               if (start >= mp->m_sb.sb_blocksize ||
+                               start + len > mp->m_sb.sb_blocksize)  {
+                       do_warn(
+       "hole (start %d, len %d) out of range, block %d, dir ino %llu\n",
+                               start, len, da_bno, ino);
+                       return(1);
+               }
+
+               for (j = start; j < start + len; j++)  {
+                       if ((map[j / NBBY] & (mask << (j % NBBY))) != 0)  {
+                               /*
+                                * bad news -- hole claims a used byte is free
+                                */
+                               do_warn(
+               "hole claims used byte %d, block %d, dir ino %llu\n",
+                                       j, da_bno, ino);
+                               return(1);
+                       }
+               }
+       }
+
+       return(0);
+}
+
+void
+process_da_freemap(xfs_mount_t *mp, da_freemap_t *map, da_hole_map_t *holes)
+{
+       int i, j, in_hole, start, length, smallest, num_holes;
+       const da_freemap_t mask = 0x1;
+
+       num_holes = in_hole = start = length = 0;
+
+       for (i = 0; i < mp->m_sb.sb_blocksize; i++)  {
+               if ((map[i / NBBY] & (mask << (i % NBBY))) == 0)  {
+                       /*
+                        * byte is free (unused)
+                        */
+                       if (in_hole == 1)
+                               continue;
+                       /*
+                        * start of a new hole
+                        */
+                       in_hole = 1;
+                       start = i;
+               } else  {
+                       /*
+                        * byte is used
+                        */
+                       if (in_hole == 0)
+                               continue;
+                       /*
+                        * end of a hole
+                        */
+                       in_hole = 0;
+                       /*
+                        * if the hole disappears, throw it away
+                        */
+                       length = i - start;
+
+                       if (length <= 0)
+                               continue;
+
+                       num_holes++;
+
+                       for (smallest = j = 0; j < XR_DA_LEAF_MAPSIZE; j++)  {
+                               if (holes->hentries[j].size <
+                                               holes->hentries[smallest].size)
+                                       smallest = j;
+
+                       }
+                       if (length > holes->hentries[smallest].size)  {
+                               holes->hentries[smallest].base = start;
+                               holes->hentries[smallest].size = length;
+                       }
+               }
+       }
+
+       /*
+        * see if we have a big hole at the end
+        */
+       if (in_hole == 1)  {
+               /*
+                * duplicate of hole placement code above
+                */
+               length = i - start;
+
+               if (length > 0)  {
+                       num_holes++;
+
+                       for (smallest = j = 0; j < XR_DA_LEAF_MAPSIZE; j++)  {
+                               if (holes->hentries[j].size <
+                                               holes->hentries[smallest].size)
+                                       smallest = j;
+
+                       }
+                       if (length > holes->hentries[smallest].size)  {
+                               holes->hentries[smallest].base = start;
+                               holes->hentries[smallest].size = length;
+                       }
+               }
+       }
+
+       holes->lost_holes = MAX(num_holes - XR_DA_LEAF_MAPSIZE, 0);
+       holes->num_holes = num_holes;
+
+       return;
+}
+
+/*
+ * returns 1 if the hole info doesn't match, 0 if it does
+ */
+/* ARGSUSED */
+int
+compare_da_freemaps(xfs_mount_t *mp, da_hole_map_t *holemap,
+                       da_hole_map_t *block_hmap, int entries,
+                       xfs_ino_t ino, xfs_dablk_t da_bno)
+{
+       int i, k, res, found;
+
+       res = 0;
+
+       /*
+        * we chop holemap->lost_holes down to being two-valued
+        * value (1 or 0) for the test  because the filesystem
+        * value is two-valued
+        */
+       if ((holemap->lost_holes > 0 ? 1 : 0) != block_hmap->lost_holes)  {
+               if (verbose)  {
+                       do_warn(
+               "- derived hole value %d, saw %d, block %d, dir ino %llu\n",
+                               holemap->lost_holes, block_hmap->lost_holes,
+                               da_bno, ino);
+                       res = 1;
+               } else
+                       return(1);
+       }
+
+       for (i = 0; i < entries; i++)  {
+               for (found = k = 0; k < entries; k++)  {
+                       if (holemap->hentries[i].base ==
+                                       block_hmap->hentries[k].base
+                                       && holemap->hentries[i].size ==
+                                       block_hmap->hentries[k].size)  
+                               found = 1;
+               }
+               if (!found)  {
+                       if (verbose)  {
+                               do_warn(
+"- derived hole (base %d, size %d) in block %d, dir inode %llu not found\n",
+                                       holemap->hentries[i].base,
+                                       holemap->hentries[i].size,
+                                       da_bno, ino);
+                               res = 1;
+                       } else
+                               return(1);
+               }
+       }
+
+       return(res);
+}
+
+#if 0
+void
+test(xfs_mount_t *mp)
+{
+       int i = 0;
+       da_hole_map_t   holemap;
+
+       init_da_freemap(dir_freemap);
+       bzero(&holemap, sizeof(da_hole_map_t));
+
+       set_da_freemap(mp, dir_freemap, 0, 50);
+       set_da_freemap(mp, dir_freemap, 100, 126);
+       set_da_freemap(mp, dir_freemap, 126, 129);
+       set_da_freemap(mp, dir_freemap, 130, 131);
+       set_da_freemap(mp, dir_freemap, 150, 160);
+       process_da_freemap(mp, dir_freemap, &holemap);
+
+       return;
+}
+#endif
+
+
+/*
+ * walk tree from root to the left-most leaf block reading in
+ * blocks and setting up cursor.  passes back file block number of the
+ * left-most leaf block if successful (bno).  returns 1 if successful,
+ * 0 if unsuccessful.
+ */
+int
+traverse_int_dablock(xfs_mount_t       *mp,
+               da_bt_cursor_t          *da_cursor,
+               xfs_dablk_t             *rbno,
+               int                     whichfork)
+{
+       xfs_dablk_t             bno;
+       int                     i;
+       xfs_da_intnode_t        *node;
+       xfs_dfsbno_t            fsbno;
+       xfs_buf_t               *bp;
+
+       /*
+        * traverse down left-side of tree until we hit the
+        * left-most leaf block setting up the btree cursor along
+        * the way.
+        */
+       bno = 0;
+       i = -1;
+       node = NULL;
+       da_cursor->active = 0;
+
+       do {
+               /*
+                * read in each block along the way and set up cursor
+                */
+               fsbno = blkmap_get(da_cursor->blkmap, bno);
+
+               if (fsbno == NULLDFSBNO)
+                       goto error_out;
+
+               bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, fsbno),
+                               XFS_FSB_TO_BB(mp, 1), 0);
+               if (!bp) {
+                       if (whichfork == XFS_DATA_FORK)
+                               do_warn("can't read block %u (fsbno %llu) for "
+                                       "directory inode %llu\n",
+                                       bno, fsbno, da_cursor->ino);
+                       else
+                               do_warn("can't read block %u (fsbno %llu) for "
+                                       "attrbute fork of inode %llu\n",
+                                       bno, fsbno, da_cursor->ino);
+                       goto error_out;
+               }
+
+               node = (xfs_da_intnode_t *)XFS_BUF_PTR(bp);
+
+               if (INT_GET(node->hdr.info.magic, ARCH_CONVERT) != XFS_DA_NODE_MAGIC)  {
+                       do_warn("bad dir/attr magic number in inode %llu, file "
+                               "bno = %u, fsbno = %llu\n", da_cursor->ino, bno, fsbno);
+                       libxfs_putbuf(bp);
+                       goto error_out;
+               }
+               if (INT_GET(node->hdr.count, ARCH_CONVERT) > XFS_DA_NODE_ENTRIES(mp))  {
+                       do_warn("bad record count in inode %llu, count = %d, max = %d\n",
+                               da_cursor->ino, INT_GET(node->hdr.count, ARCH_CONVERT),
+                               XFS_DA_NODE_ENTRIES(mp));
+                       libxfs_putbuf(bp);
+                       goto error_out;
+               }
+
+               /*
+                * maintain level counter
+                */
+               if (i == -1)
+                       i = da_cursor->active = INT_GET(node->hdr.level, ARCH_CONVERT);
+               else  {
+                       if (INT_GET(node->hdr.level, ARCH_CONVERT) == i - 1)  {
+                               i--;
+                       } else  {
+                               if (whichfork == XFS_DATA_FORK) 
+                                       do_warn("bad directory btree for directory "
+                                               "inode %llu\n", da_cursor->ino);
+                               else
+                                       do_warn("bad attribute fork btree for "
+                                               "inode %llu\n", da_cursor->ino);
+                               libxfs_putbuf(bp);
+                               goto error_out;
+                       }
+               }
+
+               da_cursor->level[i].hashval =
+                               INT_GET(node->btree[0].hashval, ARCH_CONVERT);
+               da_cursor->level[i].bp = bp;
+               da_cursor->level[i].bno = bno;
+               da_cursor->level[i].index = 0;
+#ifdef XR_DIR_TRACE
+               da_cursor->level[i].n = XFS_BUF_TO_DA_INTNODE(bp);
+#endif
+
+               /*
+                * set up new bno for next level down
+                */
+               bno = INT_GET(node->btree[0].before, ARCH_CONVERT);
+       } while(node != NULL && i > 1);
+
+       /*
+        * now return block number and get out
+        */
+       *rbno = da_cursor->level[0].bno = bno;
+       return(1);
+
+error_out:
+       while (i > 1 && i <= da_cursor->active)  {
+               libxfs_putbuf(da_cursor->level[i].bp);
+               i++;
+       }
+
+       return(0);
+}
+
+/*
+ * blow out buffer for this level and all the rest above as well
+ * if error == 0, we are not expecting to encounter any unreleased
+ * buffers (e.g. if we do, it's a mistake).  if error == 1, we're
+ * in an error-handling case so unreleased buffers may exist.
+ */
+void
+release_da_cursor_int(xfs_mount_t      *mp,
+                       da_bt_cursor_t  *cursor,
+                       int             prev_level,
+                       int             error)
+{
+       int     level = prev_level + 1;
+
+       if (cursor->level[level].bp != NULL)  {
+               if (!error)  {
+                       do_warn("release_da_cursor_int got unexpected non-null bp, "
+                               "dabno = %u\n", cursor->level[level].bno);
+               }
+               ASSERT(error != 0);
+
+               libxfs_putbuf(cursor->level[level].bp);
+               cursor->level[level].bp = NULL;
+       }
+
+       if (level < cursor->active)
+               release_da_cursor_int(mp, cursor, level, error);
+
+       return;
+}
+
+void
+release_da_cursor(xfs_mount_t  *mp,
+               da_bt_cursor_t  *cursor,
+               int             prev_level)
+{
+       release_da_cursor_int(mp, cursor, prev_level, 0);
+}
+
+void
+err_release_da_cursor(xfs_mount_t      *mp,
+                       da_bt_cursor_t  *cursor,
+                       int             prev_level)
+{
+       release_da_cursor_int(mp, cursor, prev_level, 1);
+}
+
+/*
+ * like traverse_int_dablock only it does far less checking
+ * and doesn't maintain the cursor.  Just gets you to the
+ * leftmost block in the directory.  returns the fsbno
+ * of that block if successful, NULLDFSBNO if not.
+ */
+xfs_dfsbno_t
+get_first_dblock_fsbno(xfs_mount_t     *mp,
+                       xfs_ino_t       ino,
+                       xfs_dinode_t    *dino)
+{
+       xfs_dablk_t             bno;
+       int                     i;
+       xfs_da_intnode_t        *node;
+       xfs_dfsbno_t            fsbno;
+       xfs_buf_t               *bp;
+
+       /*
+        * traverse down left-side of tree until we hit the
+        * left-most leaf block setting up the btree cursor along
+        * the way.
+        */
+       bno = 0;
+       i = -1;
+       node = NULL;
+
+       fsbno = get_bmapi(mp, dino, ino, bno, XFS_DATA_FORK);
+
+       if (fsbno == NULLDFSBNO)  {
+               do_warn("bmap of block #%u of inode %llu failed\n",
+                       bno, ino);
+               return(fsbno);
+       }
+
+       if (INT_GET(dino->di_core.di_size, ARCH_CONVERT) <= XFS_LBSIZE(mp))
+               return(fsbno);
+
+       do {
+               /*
+                * walk down left side of btree, release buffers as you
+                * go.  if the root block is a leaf (single-level btree),
+                * just return it.
+                * 
+                */
+
+               bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, fsbno),
+                               XFS_FSB_TO_BB(mp, 1), 0);
+               if (!bp) {
+                       do_warn("can't read block %u (fsbno %llu) for directory "
+                               "inode %llu\n", bno, fsbno, ino);
+                       return(NULLDFSBNO);
+               }
+
+               node = (xfs_da_intnode_t *)XFS_BUF_PTR(bp);
+
+               if (INT_GET(node->hdr.info.magic, ARCH_CONVERT) != XFS_DA_NODE_MAGIC)  {
+                       do_warn("bad dir/attr magic number in inode %llu, file "
+                               "bno = %u, fsbno = %llu\n", ino, bno, fsbno);
+                       libxfs_putbuf(bp);
+                       return(NULLDFSBNO);
+               }
+
+               if (i == -1)
+                       i = INT_GET(node->hdr.level, ARCH_CONVERT);
+               bno = INT_GET(node->btree[0].before, ARCH_CONVERT);
+
+               libxfs_putbuf(bp);
+
+               fsbno = get_bmapi(mp, dino, ino, bno, XFS_DATA_FORK);
+
+               if (fsbno == NULLDFSBNO)  {
+                       do_warn("bmap of block #%u of inode %llu failed\n", bno, ino);
+                       return(NULLDFSBNO);
+               }
+
+               i--;
+       } while(i > 0);
+
+       return(fsbno);
+}
+
+/*
+ * make sure that all entries in all blocks along the right side of
+ * of the tree are used and hashval's are consistent.  level is the
+ * level of the descendent block.  returns 0 if good (even if it had
+ * to be fixed up), and 1 if bad.  The right edge of the tree is
+ * technically a block boundary.  this routine should be used then
+ * instead of verify_da_path().
+ */
+int
+verify_final_da_path(xfs_mount_t       *mp,
+               da_bt_cursor_t          *cursor,
+               const int               p_level)
+{
+       xfs_da_intnode_t        *node;
+       int                     bad = 0;
+       int                     entry;
+       int                     this_level = p_level + 1;
+
+#ifdef XR_DIR_TRACE
+       fprintf(stderr, "in verify_final_da_path, this_level = %d\n",
+               this_level);
+#endif
+       /*
+        * the index should point to the next "unprocessed" entry
+        * in the block which should be the final (rightmost) entry
+        */
+       entry = cursor->level[this_level].index;
+       node = (xfs_da_intnode_t *)XFS_BUF_PTR(cursor->level[this_level].bp);
+       /*
+        * check internal block consistency on this level -- ensure
+        * that all entries are used, encountered and expected hashvals
+        * match, etc.
+        */
+       if (entry != INT_GET(node->hdr.count, ARCH_CONVERT) - 1)  {
+               do_warn("directory/attribute block used/count inconsistency - %d/%hu\n",
+                       entry, INT_GET(node->hdr.count, ARCH_CONVERT));
+               bad++;
+       }
+       /*
+        * hash values monotonically increasing ???
+        */
+       if (cursor->level[this_level].hashval >=
+                               INT_GET(node->btree[entry].hashval, ARCH_CONVERT)) {
+               do_warn("directory/attribute block hashvalue inconsistency, "
+                       "expected > %u / saw %u\n", cursor->level[this_level].hashval,
+                       INT_GET(node->btree[entry].hashval, ARCH_CONVERT));
+               bad++;
+       }
+       if (INT_GET(node->hdr.info.forw, ARCH_CONVERT) != 0)  {
+               do_warn("bad directory/attribute forward block pointer, expected 0, "
+                       "saw %u\n", INT_GET(node->hdr.info.forw, ARCH_CONVERT));
+               bad++;
+       }
+       if (bad) {
+               do_warn("bad directory block in dir ino %llu\n", cursor->ino);
+               return(1);
+       }
+       /*
+        * keep track of greatest block # -- that gets
+        * us the length of the directory
+        */
+       if (cursor->level[this_level].bno > cursor->greatest_bno)
+               cursor->greatest_bno = cursor->level[this_level].bno;
+
+       /*
+        * ok, now check descendant block number against this level
+        */
+       if (cursor->level[p_level].bno !=
+                       INT_GET(node->btree[entry].before, ARCH_CONVERT))  {
+#ifdef XR_DIR_TRACE
+               fprintf(stderr, "bad directory btree pointer, child bno should be %d, "
+                       "block bno is %d, hashval is %u\n",
+                       INT_GET(node->btree[entry].before, ARCH_CONVERT),
+                       cursor->level[p_level].bno,
+                       cursor->level[p_level].hashval);
+               fprintf(stderr, "verify_final_da_path returns 1 (bad) #1a\n");
+#endif
+               return(1);
+       }
+
+       if (cursor->level[p_level].hashval !=
+                               INT_GET(node->btree[entry].hashval, ARCH_CONVERT)) {
+               if (!no_modify)  {
+                       do_warn("correcting bad hashval in non-leaf dir/attr block\n");
+                       do_warn("\tin (level %d) in inode %llu.\n",
+                               this_level, cursor->ino);
+                       INT_SET(node->btree[entry].hashval, ARCH_CONVERT,
+                               cursor->level[p_level].hashval);
+                       cursor->level[this_level].dirty++;
+               } else  {
+                       do_warn("would correct bad hashval in non-leaf dir/attr "
+                               "block\n\tin (level %d) in inode %llu.\n",
+                               this_level, cursor->ino);
+               }
+       }
+
+       /*
+        * release/write buffer
+        */
+       ASSERT(cursor->level[this_level].dirty == 0 ||
+               cursor->level[this_level].dirty && !no_modify);
+
+       if (cursor->level[this_level].dirty && !no_modify)
+               libxfs_writebuf(cursor->level[this_level].bp, 0);
+       else
+               libxfs_putbuf(cursor->level[this_level].bp);
+
+       cursor->level[this_level].bp = NULL;
+
+       /*
+        * bail out if this is the root block (top of tree)
+        */
+       if (this_level >= cursor->active)  {
+#ifdef XR_DIR_TRACE
+               fprintf(stderr, "verify_final_da_path returns 0 (ok)\n");
+#endif
+               return(0);
+       }
+       /*
+        * set hashvalue to correctl reflect the now-validated
+        * last entry in this block and continue upwards validation
+        */
+       cursor->level[this_level].hashval =
+                       INT_GET(node->btree[entry].hashval, ARCH_CONVERT);
+       return(verify_final_da_path(mp, cursor, this_level));
+}
+
+/*
+ * Verifies the path from a descendant block up to the root.
+ * Should be called when the descendant level traversal hits
+ * a block boundary before crossing the boundary (reading in a new
+ * block).
+ *
+ * the directory/attr btrees work differently to the other fs btrees.
+ * each interior block contains records that are <hashval, bno>
+ * pairs.  The bno is a file bno, not a filesystem bno.  The last
+ * hashvalue in the block <bno> will be <hashval>.  BUT unlike
+ * the freespace btrees, the *last* value in each block gets
+ * propagated up the tree instead of the first value in each block.
+ * that is, the interior records point to child blocks and the *greatest*
+ * hash value contained by the child block is the one the block above
+ * uses as the key for the child block.
+ *
+ * level is the level of the descendent block.  returns 0 if good,
+ * and 1 if bad.  The descendant block may be a leaf block.
+ *
+ * the invariant here is that the values in the cursor for the
+ * levels beneath this level (this_level) and the cursor index
+ * for this level *must* be valid.
+ *
+ * that is, the hashval/bno info is accurate for all
+ * DESCENDANTS and match what the node[index] information
+ * for the current index in the cursor for this level.
+ *
+ * the index values in the cursor for the descendant level
+ * are allowed to be off by one as they will reflect the
+ * next entry at those levels to be processed.
+ *
+ * the hashvalue for the current level can't be set until
+ * we hit the last entry in the block so, it's garbage
+ * until set by this routine.
+ *
+ * bno and bp for the current block/level are always valid
+ * since they have to be set so we can get a buffer for the
+ * block.
+ */
+int
+verify_da_path(xfs_mount_t     *mp,
+       da_bt_cursor_t          *cursor,
+       const int               p_level)
+{
+       xfs_da_intnode_t        *node;
+       xfs_da_intnode_t        *newnode;
+       xfs_dfsbno_t            fsbno;
+       xfs_dablk_t             dabno;
+       xfs_buf_t               *bp;
+       int                     bad;
+       int                     entry;
+       int                     this_level = p_level + 1;
+
+       /*
+        * index is currently set to point to the entry that
+        * should be processed now in this level.
+        */
+       entry = cursor->level[this_level].index;
+       node = (xfs_da_intnode_t *)XFS_BUF_PTR(cursor->level[this_level].bp);
+
+       /*
+        * if this block is out of entries, validate this
+        * block and move on to the next block.
+        * and update cursor value for said level
+        */
+       if (entry >= INT_GET(node->hdr.count, ARCH_CONVERT))  {
+               /*
+                * update the hash value for this level before
+                * validating it.  bno value should be ok since
+                * it was set when the block was first read in.
+                */
+               cursor->level[this_level].hashval = 
+                               INT_GET(node->btree[entry - 1].hashval, ARCH_CONVERT);
+
+               /*
+                * keep track of greatest block # -- that gets
+                * us the length of the directory
+                */
+               if (cursor->level[this_level].bno > cursor->greatest_bno)
+                       cursor->greatest_bno = cursor->level[this_level].bno;
+
+               /*
+                * validate the path for the current used-up block
+                * before we trash it
+                */
+               if (verify_da_path(mp, cursor, this_level))
+                       return(1);
+               /*
+                * ok, now get the next buffer and check sibling pointers
+                */
+               dabno = INT_GET(node->hdr.info.forw, ARCH_CONVERT);
+               ASSERT(dabno != 0);
+               fsbno = blkmap_get(cursor->blkmap, dabno);
+
+               if (fsbno == NULLDFSBNO) {
+                       do_warn("can't get map info for block %u of directory "
+                               "inode %llu\n", dabno, cursor->ino);
+                       return(1);
+               }
+
+               bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, fsbno),
+                               XFS_FSB_TO_BB(mp, 1), 0);
+               if (!bp) {
+                       do_warn("can't read block %u (%llu) for directory inode %llu\n",
+                               dabno, fsbno, cursor->ino);
+                       return(1);
+               }
+
+               newnode = (xfs_da_intnode_t *)XFS_BUF_PTR(bp);
+               /*
+                * verify magic number and back pointer, sanity-check
+                * entry count, verify level
+                */
+               bad = 0;
+               if (INT_GET(newnode->hdr.info.magic, ARCH_CONVERT) != XFS_DA_NODE_MAGIC)  {
+                       do_warn("bad magic number %x in block %u (%llu) for directory "
+                               "inode %llu\n",
+                               INT_GET(newnode->hdr.info.magic, ARCH_CONVERT),
+                               dabno, fsbno, cursor->ino);
+                       bad++;
+               }
+               if (INT_GET(newnode->hdr.info.back, ARCH_CONVERT) !=
+                                               cursor->level[this_level].bno)  {
+                       do_warn("bad back pointer in block %u (%llu) for directory "
+                               "inode %llu\n", dabno, fsbno, cursor->ino);
+                       bad++;
+               }
+               if (INT_GET(newnode->hdr.count, ARCH_CONVERT) >
+                                               XFS_DA_NODE_ENTRIES(mp))  {
+                       do_warn("entry count %d too large in block %u (%llu) for "
+                               "directory inode %llu\n",
+                               INT_GET(newnode->hdr.count, ARCH_CONVERT),
+                               dabno, fsbno, cursor->ino);
+                       bad++;
+               }
+               if (INT_GET(newnode->hdr.level, ARCH_CONVERT) != this_level)  {
+                       do_warn("bad level %d in block %u (%llu) for directory inode "
+                               "%llu\n", INT_GET(newnode->hdr.level, ARCH_CONVERT),
+                               dabno, fsbno, cursor->ino);
+                       bad++;
+               }
+               if (bad)  {
+#ifdef XR_DIR_TRACE
+                       fprintf(stderr, "verify_da_path returns 1 (bad) #4\n");
+#endif
+                       libxfs_putbuf(bp);
+                       return(1);
+               }
+               /*
+                * update cursor, write out the *current* level if
+                * required.  don't write out the descendant level
+                */
+               ASSERT(cursor->level[this_level].dirty == 0 ||
+                       cursor->level[this_level].dirty && !no_modify);
+
+               if (cursor->level[this_level].dirty && !no_modify)
+                       libxfs_writebuf(cursor->level[this_level].bp, 0);
+               else
+                       libxfs_putbuf(cursor->level[this_level].bp);
+               cursor->level[this_level].bp = bp;
+               cursor->level[this_level].dirty = 0;
+               cursor->level[this_level].bno = dabno;
+               cursor->level[this_level].hashval =
+                       INT_GET(newnode->btree[0].hashval, ARCH_CONVERT);
+#ifdef XR_DIR_TRACE
+               cursor->level[this_level].n = newnode;
+#endif
+               node = newnode;
+
+               entry = cursor->level[this_level].index = 0;
+       }
+       /*
+        * ditto for block numbers
+        */
+       if (cursor->level[p_level].bno !=
+                       INT_GET(node->btree[entry].before, ARCH_CONVERT))  {
+#ifdef XR_DIR_TRACE
+               fprintf(stderr, "bad directory btree pointer, child bno should be %d, "
+                       "block bno is %d, hashval is %u\n",
+                       INT_GET(node->btree[entry].before, ARCH_CONVERT),
+                       cursor->level[p_level].bno,
+                       cursor->level[p_level].hashval);
+               fprintf(stderr, "verify_da_path returns 1 (bad) #1a\n");
+#endif
+               return(1);
+       }
+       /*
+        * ok, now validate last hashvalue in the descendant
+        * block against the hashval in the current entry
+        */
+       if (cursor->level[p_level].hashval !=
+                       INT_GET(node->btree[entry].hashval, ARCH_CONVERT))  {
+               if (!no_modify)  {
+                       do_warn("correcting bad hashval in interior dir/attr block\n");
+                       do_warn("\tin (level %d) in inode %llu.\n",
+                               this_level, cursor->ino);
+                       INT_SET(node->btree[entry].hashval, ARCH_CONVERT,
+                               cursor->level[p_level].hashval);
+                       cursor->level[this_level].dirty++;
+               } else  {
+                       do_warn("would correct bad hashval in interior dir/attr "
+                               "block\n\tin (level %d) in inode %llu.\n",
+                               this_level, cursor->ino);
+               }
+       }
+       /*
+        * increment index for this level to point to next entry
+        * (which should point to the next descendant block)
+        */
+       cursor->level[this_level].index++;
+#ifdef XR_DIR_TRACE
+       fprintf(stderr, "verify_da_path returns 0 (ok)\n");
+#endif
+       return(0);
+}
+
+#if 0
+/*
+ * handles junking directory leaf block entries that have zero lengths
+ * buf_dirty is an in/out, set to 1 if the leaf was modified.
+ * we do NOT initialize it to zero if nothing happened because it
+ * may be already set by the caller.  Assumes that the block
+ * has been compacted before calling this routine.
+ */
+void
+junk_zerolen_dir_leaf_entries(
+       xfs_mount_t             *mp,
+       xfs_dir_leafblock_t     *leaf,
+       xfs_ino_t               ino,
+       int                     *buf_dirty)
+{
+       xfs_dir_leaf_entry_t    *entry;
+       xfs_dir_leaf_name_t     *namest;
+       xfs_dir_leaf_hdr_t      *hdr;
+       xfs_dir_leaf_map_t      *map;
+       xfs_ino_t               tmp_ino;
+       int                     bytes;
+       int                     tmp_bytes;
+       int                     current_hole = 0;
+       int                     i;
+       int                     j;
+       int                     tmp;
+       int                     start;
+       int                     before;
+       int                     after;
+       int                     smallest;
+       int                     tablesize;
+
+       entry = &leaf->entries[0];
+       hdr = &leaf->hdr;
+
+       /*
+        * we can convert the entries to one character entries
+        * as long as we have space.  Once we run out, then
+        * we have to delete really delete (copy over) an entry.
+        * however, that frees up some space that we could use ...
+        *
+        * so the idea is, we'll use up space from all the holes,
+        * potentially leaving each hole too small to do any good.
+        * then if need to, we'll delete entries and use that space
+        * up from the top-most byte down.  that may leave a 4th hole
+        * but we can represent that by correctly setting the value
+        * of firstused.  that leaves any hole between the end of
+        * the entry list and firstused so it doesn't have to be
+        * recorded in the hole map.
+        */
+
+       for (bytes = i = 0; i < INT_GET(hdr->count, ARCH_CONVERT); entry++, i++) {
+               /*
+                * skip over entries that are good or already converted
+                */
+               if (entry->namelen != 0)
+                       continue;
+
+               *buf_dirty = 1;
+#if 0
+               /*
+                * try and use up existing holes first until they get
+                * too small, then set bytes to the # of bytes between
+                * the current heap beginning and the last used byte
+                * in the entry table.
+                */
+               if (bytes < sizeof(xfs_dir_leaf_name_t) &&
+                               current_hole < XFS_DIR_LEAF_MAPSIZE)  {
+                       /*
+                        * skip over holes that are too small
+                        */
+                       while (current_hole < XFS_DIR_LEAF_MAPSIZE &&
+                               INT_GET(hdr->freemap[current_hole].size, ARCH_CONVERT) <
+                                       sizeof(xfs_dir_leaf_name_t))  {
+                               current_hole++;
+                       }
+
+                       if (current_hole < XFS_DIR_LEAF_MAPSIZE)
+                               bytes = INT_GET(hdr->freemap[current_hole].size, ARCH_CONVERT);
+                       else
+                               bytes = (int) INT_GET(hdr->firstused, ARCH_CONVERT) -
+                                ((__psint_t) &leaf->entries[INT_GET(hdr->count, ARCH_CONVERT)] -
+                                 (__psint_t) leaf);
+               }
+#endif
+               current_hole = 0;
+
+               for (map = &hdr->freemap[0];
+                               current_hole < XFS_DIR_LEAF_MAPSIZE &&
+                                       INT_GET(map->size, ARCH_CONVERT) < sizeof(xfs_dir_leaf_name_t);
+                               map++)  {
+                       current_hole++;
+               }
+
+               /*
+                * if we can use an existing hole, do it.  otherwise,
+                * delete entries until the deletions create a big enough
+                * hole to convert another entry.  then use up those bytes
+                * bytes until you run low.  then delete entries again ...
+                */
+               if (current_hole < XFS_DIR_LEAF_MAPSIZE)  {
+                       ASSERT(sizeof(xfs_dir_leaf_name_t) <= bytes);
+
+                       do_warn("marking bad entry in directory inode %llu\n",
+                               ino);
+
+                       entry->namelen = 1;
+                       INT_SET(entry->nameidx, ARCH_CONVERT, INT_GET(hdr->freemap[current_hole].base, ARCH_CONVERT) +
+                                       bytes - sizeof(xfs_dir_leaf_name_t));
+
+                       namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT));
+                       tmp_ino = NULLFSINO;
+                       XFS_DIR_SF_PUT_DIRINO_ARCH(&tmp_ino, &namest->inumber, ARCH_CONVERT);
+                       namest->name[0] = '/';
+
+                       if (INT_GET(entry->nameidx, ARCH_CONVERT) < INT_GET(hdr->firstused, ARCH_CONVERT))
+                               INT_SET(hdr->firstused, ARCH_CONVERT, INT_GET(entry->nameidx, ARCH_CONVERT));
+                       INT_MOD(hdr->freemap[current_hole].size, ARCH_CONVERT, -(sizeof(xfs_dir_leaf_name_t)));
+                       INT_MOD(hdr->namebytes, ARCH_CONVERT, +1);
+               } else  {
+                       /*
+                        * delete the table entry and try and account for the
+                        * space in the holemap.  don't have to update namebytes
+                        * or firstused since we're not actually deleting any
+                        * bytes from the heap.  following code swiped from
+                        * xfs_dir_leaf_remove() in xfs_dir_leaf.c
+                        */
+                       INT_MOD(hdr->count, ARCH_CONVERT, -1);
+                       do_warn(
+                       "deleting zero length entry in directory inode %llu\n",
+                               ino);
+                       /*
+                        * overwrite the bad entry unless it's the
+                        * last entry in the list (highly unlikely).
+                        * zero out the free'd bytes.
+                        */
+                       if (INT_GET(hdr->count, ARCH_CONVERT) - i > 0)  {
+                               memmove(entry, entry + 1, (INT_GET(hdr->count, ARCH_CONVERT) - i) *
+                                       sizeof(xfs_dir_leaf_entry_t));
+                       }
+                       bzero((void *) ((__psint_t) entry +
+                               (INT_GET(leaf->hdr.count, ARCH_CONVERT) - i - 1) *
+                               sizeof(xfs_dir_leaf_entry_t)),
+                               sizeof(xfs_dir_leaf_entry_t));
+
+                       start = (__psint_t) &leaf->entries[INT_GET(hdr->count, ARCH_CONVERT)] -
+                               (__psint_t) &leaf;
+                       tablesize = sizeof(xfs_dir_leaf_entry_t) *
+                               (INT_GET(hdr->count, ARCH_CONVERT) + 1) + sizeof(xfs_dir_leaf_hdr_t);
+                       map = &hdr->freemap[0];
+                       tmp = INT_GET(map->size, ARCH_CONVERT);
+                       before = after = -1;
+                       smallest = XFS_DIR_LEAF_MAPSIZE - 1;
+                       for (j = 0; j < XFS_DIR_LEAF_MAPSIZE; map++, j++) {
+                               ASSERT(INT_GET(map->base, ARCH_CONVERT) < XFS_LBSIZE(mp));
+                               ASSERT(INT_GET(map->size, ARCH_CONVERT) < XFS_LBSIZE(mp));
+                               if (INT_GET(map->base, ARCH_CONVERT) == tablesize) {
+                                       INT_MOD(map->base, ARCH_CONVERT, -(sizeof(xfs_dir_leaf_entry_t)));
+                                       INT_MOD(map->size, ARCH_CONVERT, sizeof(xfs_dir_leaf_entry_t));
+                               }
+
+                               if ((INT_GET(map->base, ARCH_CONVERT) + INT_GET(map->size, ARCH_CONVERT)) == start) {
+                                       before = j;
+                               } else if (INT_GET(map->base, ARCH_CONVERT) == start +
+                                               sizeof(xfs_dir_leaf_entry_t))  {
+                                       after = j;
+                               } else if (INT_GET(map->size, ARCH_CONVERT) < tmp) {
+                                       tmp = INT_GET(map->size, ARCH_CONVERT);
+                                       smallest = j;
+                               }
+                       }
+
+                       /*
+                        * Coalesce adjacent freemap regions,
+                        * or replace the smallest region.
+                        */
+                       if ((before >= 0) || (after >= 0)) {
+                               if ((before >= 0) && (after >= 0))  {
+                                       map = &hdr->freemap[before];
+                                       INT_MOD(map->size, ARCH_CONVERT, sizeof(xfs_dir_leaf_entry_t));
+                                       INT_MOD(map->size, ARCH_CONVERT, INT_GET(hdr->freemap[after].size, ARCH_CONVERT));
+                                       INT_ZERO(hdr->freemap[after].base, ARCH_CONVERT);
+                                       INT_ZERO(hdr->freemap[after].size, ARCH_CONVERT);
+                               } else if (before >= 0) {
+                                       map = &hdr->freemap[before];
+                                       INT_MOD(map->size, ARCH_CONVERT, sizeof(xfs_dir_leaf_entry_t));
+                               } else {
+                                       map = &hdr->freemap[after];
+                                       INT_SET(map->base, ARCH_CONVERT, start);
+                                       INT_MOD(map->size, ARCH_CONVERT, sizeof(xfs_dir_leaf_entry_t));
+                               }
+                       } else  {
+                               /*
+                                * Replace smallest region
+                                * (if it is smaller than free'd entry)
+                                */
+                               map = &hdr->freemap[smallest];
+                               if (INT_GET(map->size, ARCH_CONVERT) < sizeof(xfs_dir_leaf_entry_t))  {
+                                       INT_SET(map->base, ARCH_CONVERT, start);
+                                       INT_SET(map->size, ARCH_CONVERT, sizeof(xfs_dir_leaf_entry_t));
+                               }
+                               /*
+                                * mark as needing compaction
+                                */
+                               hdr->holes = 1;
+                       }
+#if 0
+                       /*
+                        * do we have to delete stuff or is there
+                        * room for deletions?
+                        */
+                       ASSERT(current_hole == XFS_DIR_LEAF_MAPSIZE);
+
+                       /*
+                        * here, bytes == number of unused bytes from
+                        * end of list to top (beginning) of heap
+                        * (firstused).  It's ok to leave extra
+                        * unused bytes in that region because they
+                        * wind up before firstused (which we reset
+                        * appropriately
+                        */
+                       if (bytes < sizeof(xfs_dir_leaf_name_t))  {
+                               /*
+                                * have to delete an entry because
+                                * we have no room to convert it to
+                                * a bad entry
+                                */
+                               do_warn(
+                               "deleting entry in directory inode %llu\n",
+                                       ino);
+                               /*
+                                * overwrite the bad entry unless it's the
+                                * last entry in the list (highly unlikely).
+                                */
+                               if (INT_GET(leaf->hdr.count, ARCH_CONVERT) - i - 1> 0)  {
+                                       memmove(entry, entry + 1,
+                                               (INT_GET(leaf->hdr.count, ARCH_CONVERT) - i - 1) *
+                                               sizeof(xfs_dir_leaf_entry_t));
+                               }
+                               bzero((void *) ((__psint_t) entry +
+                                       (INT_GET(leaf->hdr.count, ARCH_CONVERT) - i - 1) *
+                                       sizeof(xfs_dir_leaf_entry_t)),
+                                       sizeof(xfs_dir_leaf_entry_t));
+
+                               /*
+                                * bump up free byte count, drop other
+                                * index vars since the table just
+                                * shrank by one entry and we don't
+                                * want to miss any as we walk the table
+                                */
+                               bytes += sizeof(xfs_dir_leaf_entry_t);
+                               INT_MOD(leaf->hdr.count, ARCH_CONVERT, -1);
+                               entry--;
+                               i--;
+                       } else  {
+                               /*
+                                * convert entry using the bytes in between
+                                * the end of the entry table and the heap
+                                */
+                               entry->namelen = 1;
+                               INT_MOD(leaf->hdr.firstused, ARCH_CONVERT, -(sizeof(xfs_dir_leaf_name_t)));
+                               INT_SET(entry->nameidx, ARCH_CONVERT, INT_GET(leaf->hdr.firstused, ARCH_CONVERT));
+
+                               namest = XFS_DIR_LEAF_NAMESTRUCT(leaf,
+                                                       INT_GET(entry->nameidx, ARCH_CONVERT));
+                               tmp_ino = NULLFSINO;
+                               XFS_DIR_SF_PUT_DIRINO_ARCH(&tmp_ino,
+                                                       &namest->inumber, ARCH_CONVERT);
+                               namest->name[0] = '/';
+
+                               bytes -= sizeof(xfs_dir_leaf_entry_t);
+                       }
+#endif
+               }
+       }
+
+       return;
+}
+#endif
+
+static char dirbuf[64 * 1024];
+
+/*
+ * called by both node dir and leaf dir processing routines
+ * validates all contents *but* the sibling pointers (forw/back)
+ * and the magic number.
+ *
+ * returns 0 if the directory is ok or has been brought to the
+ * stage that it can be fixed up later (in phase 6),
+ * 1 if it has to be junked.
+ *
+ * Right now we fix a lot of things (TBD == to be deleted).
+ *
+ *     incorrect . entries - inode # is corrected
+ *     entries with mismatched hashvalue/name strings - hashvalue reset
+ *     entries whose hashvalues are out-of-order - entry marked TBD
+ *     .. entries with invalid inode numbers - entry marked TBD
+ *     entries with invalid inode numbers - entry marked TBD
+ *     multiple . entries - all but the first entry are marked TBD
+ *     zero-length entries - entry is deleted
+ *     entries with an out-of-bounds name index ptr - entry is deleted
+ *
+ * entries marked TBD have the first character of the name (which
+ *     lives in the heap) have the first character in the name set
+ *     to '/' -- an illegal value.
+ *
+ * entries deleted right here are deleted by blowing away the entry
+ *     (but leaving the heap untouched).  any space that was used
+ *     by the deleted entry will be reclaimed by the block freespace
+ *     (da_freemap) processing code.
+ *
+ * if two entries claim the same space in the heap (say, due to
+ * bad entry name index pointers), we lose the directory.  We could
+ * try harder to fix this but it'll do for now.
+ */
+/* ARGSUSED */
+int
+process_leaf_dir_block(
+       xfs_mount_t             *mp,
+       xfs_dir_leafblock_t     *leaf,
+       xfs_dablk_t             da_bno,
+       xfs_ino_t               ino, 
+       xfs_dahash_t            last_hashval,   /* last hashval encountered */
+       int                     ino_discovery,
+       blkmap_t                *blkmap,
+       int                     *dot,
+       int                     *dotdot,
+       xfs_ino_t               *parent,
+       int                     *buf_dirty,     /* is buffer dirty? */
+       xfs_dahash_t            *next_hashval)  /* greatest hashval in block */
+{
+       xfs_ino_t                       lino;
+       xfs_dir_leaf_entry_t            *entry;
+       xfs_dir_leaf_entry_t            *s_entry;
+       xfs_dir_leaf_entry_t            *d_entry;
+       xfs_dir_leafblock_t             *new_leaf;
+       char                            *first_byte;
+       xfs_dir_leaf_name_t             *namest;
+       ino_tree_node_t                 *irec_p;
+       int                             num_entries;
+       xfs_dahash_t                    hashval;
+       int                             i;
+       int                             nm_illegal;
+       int                             bytes;
+       int                             start;
+       int                             stop;
+       int                             res = 0;
+       int                             ino_off;
+       int                             first_used;
+       int                             bytes_used;
+       int                             reset_holes;
+       int                             zero_len_entries;
+       char                            fname[MAXNAMELEN + 1];
+       da_hole_map_t                   holemap;
+       da_hole_map_t                   bholemap;
+#if 0
+       unsigned char                   *dir_freemap;
+#endif
+
+#ifdef XR_DIR_TRACE
+       fprintf(stderr, "\tprocess_leaf_dir_block - ino %llu\n", ino);
+#endif
+
+       /*
+        * clear static dir block freespace bitmap
+        */
+       init_da_freemap(dir_freemap);
+
+#if 0
+       /*
+        * XXX - alternatively, do this for parallel usage.
+        * set up block freespace map.  head part of dir leaf block
+        * including all entries are packed so we can use sizeof
+        * and not worry about alignment.
+        */
+
+       if ((dir_freemap = alloc_da_freemap(mp)) == NULL)  {
+               do_error("couldn't allocate directory block freemap\n");
+               abort();
+       }
+#endif
+
+       *buf_dirty = 0;
+       first_used = mp->m_sb.sb_blocksize;
+       zero_len_entries = 0;
+       bytes_used = 0;
+
+       i = stop = sizeof(xfs_dir_leaf_hdr_t);
+       if (set_da_freemap(mp, dir_freemap, 0, stop))  {
+               do_warn(
+"directory block header conflicts with used space in directory inode %llu\n",
+                               ino);
+               return(1);
+       }
+
+       /*
+        * verify structure:  monotonically increasing hash value for
+        * all leaf entries, indexes for all entries must be within
+        * this fs block (trivially true for 64K blocks).  also track
+        * used space so we can check the freespace map.  check for
+        * zero-length entries.  for now, if anything's wrong, we
+        * junk the directory and we'll pick up no-longer referenced
+        * inodes on a later pass.
+        */
+       for (i = 0, entry = &leaf->entries[0];
+                       i < INT_GET(leaf->hdr.count, ARCH_CONVERT);
+                       i++, entry++)  {
+               /*
+                * check that the name index isn't out of bounds
+                * if it is, delete the entry since we can't
+                * grab the inode #.
+                */
+               if (INT_GET(entry->nameidx, ARCH_CONVERT) >= mp->m_sb.sb_blocksize)  {
+                       if (!no_modify)  {
+                               *buf_dirty = 1;
+
+                               if (INT_GET(leaf->hdr.count, ARCH_CONVERT) > 1)  {
+                                       do_warn(
+"nameidx %d for entry #%d, bno %d, ino %llu > fs blocksize, deleting entry\n",
+                                               INT_GET(entry->nameidx, ARCH_CONVERT), i, da_bno, ino);
+                                       ASSERT(INT_GET(leaf->hdr.count, ARCH_CONVERT) > i);
+
+                                       bytes = (INT_GET(leaf->hdr.count, ARCH_CONVERT) - i) *
+                                               sizeof(xfs_dir_leaf_entry_t);
+
+                                       /*
+                                        * compress table unless we're
+                                        * only dealing with 1 entry
+                                        * (the last one) in which case
+                                        * just zero it.
+                                        */
+                                       if (bytes >
+                                           sizeof(xfs_dir_leaf_entry_t))  {
+                                               memmove(entry, entry + 1,
+                                                       bytes);
+                                               bzero((void *)
+                                               ((__psint_t) entry + bytes),
+                                               sizeof(xfs_dir_leaf_entry_t));
+                                       } else  {
+                                               bzero(entry,
+                                               sizeof(xfs_dir_leaf_entry_t));
+                                       }
+
+                                       /*
+                                        * sync vars to match smaller table.
+                                        * don't have to worry about freespace
+                                        * map since we haven't set it for
+                                        * this entry yet.
+                                        */
+                                       INT_MOD(leaf->hdr.count, ARCH_CONVERT, -1);
+                                       i--;
+                                       entry--;
+                               } else  {
+                                       do_warn(
+"nameidx %d, entry #%d, bno %d, ino %llu > fs blocksize, marking entry bad\n",
+                                               INT_GET(entry->nameidx, ARCH_CONVERT), i, da_bno, ino);
+                                       INT_SET(entry->nameidx, ARCH_CONVERT, mp->m_sb.sb_blocksize -
+                                               sizeof(xfs_dir_leaf_name_t));
+                                       namest = XFS_DIR_LEAF_NAMESTRUCT(leaf,
+                                                       INT_GET(entry->nameidx, ARCH_CONVERT));
+                                       lino = NULLFSINO;
+                                       XFS_DIR_SF_PUT_DIRINO_ARCH(&lino,
+                                                       &namest->inumber, ARCH_CONVERT);
+                                       namest->name[0] = '/';
+                               }
+                       } else  {
+                               do_warn(
+"nameidx %d, entry #%d, bno %d, ino %llu > fs blocksize, would delete entry\n",
+                                       INT_GET(entry->nameidx, ARCH_CONVERT), i, da_bno, ino);
+                       }
+                       continue;
+               }
+               /*
+                * inode processing -- make sure the inode
+                * is in our tree or we add it to the uncertain
+                * list if the inode # is valid.  if namelen is 0,
+                * we can still try for the inode as long as nameidx
+                * is ok.
+                */
+               namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT));
+               XFS_DIR_SF_GET_DIRINO_ARCH(&namest->inumber, &lino, ARCH_CONVERT);
+
+               /*
+                * we may have to blow out an entry because of bad
+                * inode numbers.  do NOT touch the name until after
+                * we've computed the hashvalue and done a namecheck()
+                * on the name.
+                */
+               if (!ino_discovery && lino == NULLFSINO)  {
+                       /*
+                        * don't do a damned thing.  We already
+                        * found this (or did it ourselves) during
+                        * phase 3.
+                        */
+               } else if (verify_inum(mp, lino))  {
+                       /*
+                        * bad inode number.  clear the inode
+                        * number and the entry will get removed
+                        * later.  We don't trash the directory
+                        * since it's still structurally intact.
+                        */
+                       do_warn(
+"invalid ino number %llu in dir ino %llu, entry #%d, bno %d\n",
+                               lino, ino, i, da_bno);
+                       if (!no_modify)  {
+                               do_warn(
+                               "\tclearing ino number in entry %d...\n", i);
+
+                               lino = NULLFSINO;
+                               XFS_DIR_SF_PUT_DIRINO_ARCH(&lino, &namest->inumber, ARCH_CONVERT);
+                               *buf_dirty = 1;
+                       } else  {
+                               do_warn(
+                               "\twould clear ino number in entry %d...\n", i);
+                       }
+               } else if (lino == mp->m_sb.sb_rbmino)  {
+                       do_warn(
+"entry #%d, bno %d in directory %llu references realtime bitmap inode %llu\n",
+                               i, da_bno, ino, lino);
+                       if (!no_modify)  {
+                               do_warn(
+                               "\tclearing ino number in entry %d...\n", i);
+
+                               lino = NULLFSINO;
+                               XFS_DIR_SF_PUT_DIRINO_ARCH(&lino, &namest->inumber, ARCH_CONVERT);
+                               *buf_dirty = 1;
+                       } else  {
+                               do_warn(
+                               "\twould clear ino number in entry %d...\n", i);
+                       }
+               } else if (lino == mp->m_sb.sb_rsumino)  {
+                       do_warn(
+"entry #%d, bno %d in directory %llu references realtime summary inode %llu\n",
+                               i, da_bno, ino, lino);
+                       if (!no_modify)  {
+                               do_warn(
+                               "\tclearing ino number in entry %d...\n", i);
+
+                               lino = NULLFSINO;
+                               XFS_DIR_SF_PUT_DIRINO_ARCH(&lino, &namest->inumber, ARCH_CONVERT);
+                               *buf_dirty = 1;
+                       } else  {
+                               do_warn(
+                               "\twould clear ino number in entry %d...\n", i);
+                       }
+               } else if (lino == mp->m_sb.sb_uquotino)  {
+                       do_warn(
+"entry #%d, bno %d in directory %llu references user quota inode %llu\n",
+                               i, da_bno, ino, lino);
+                       if (!no_modify)  {
+                               do_warn(
+                               "\tclearing ino number in entry %d...\n", i);
+
+                               lino = NULLFSINO;
+                               XFS_DIR_SF_PUT_DIRINO_ARCH(&lino, &namest->inumber, ARCH_CONVERT);
+                               *buf_dirty = 1;
+                       } else  {
+                               do_warn(
+                               "\twould clear ino number in entry %d...\n", i);
+                       }
+               } else if (lino == mp->m_sb.sb_pquotino)  {
+                       do_warn(
+"entry #%d, bno %d in directory %llu references proj quota inode %llu\n",
+                               i, da_bno, ino, lino);
+                       if (!no_modify)  {
+                               do_warn(
+                               "\tclearing ino number in entry %d...\n", i);
+
+                               lino = NULLFSINO;
+                               XFS_DIR_SF_PUT_DIRINO_ARCH(&lino, &namest->inumber, ARCH_CONVERT);
+                               *buf_dirty = 1;
+                       } else  {
+                               do_warn(
+                               "\twould clear ino number in entry %d...\n", i);
+                       }
+               } else if (lino == old_orphanage_ino)  {
+                       /*
+                        * do nothing, silently ignore it, entry has
+                        * already been marked TBD since old_orphanage_ino
+                        * is set non-zero.
+                        */
+               } else if ((irec_p = find_inode_rec(
+                               XFS_INO_TO_AGNO(mp, lino),
+                               XFS_INO_TO_AGINO(mp, lino))) != NULL)  {
+                       /*
+                        * inode recs should have only confirmed
+                        * inodes in them
+                        */
+                       ino_off = XFS_INO_TO_AGINO(mp, lino) -
+                                       irec_p->ino_startnum;
+                       ASSERT(is_inode_confirmed(irec_p, ino_off));
+                       /*
+                        * if inode is marked free and we're in inode
+                        * discovery mode, leave the entry alone for now.
+                        * if the inode turns out to be used, we'll figure
+                        * that out when we scan it.  If the inode really
+                        * is free, we'll hit this code again in phase 4
+                        * after we've finished inode discovery and blow
+                        * out the entry then.
+                        */
+                       if (!ino_discovery && is_inode_free(irec_p, ino_off))  {
+                               if (!no_modify)  {
+                                       do_warn(
+"entry references free inode %llu in directory %llu, will clear entry\n",
+                                               lino, ino);
+                                       lino = NULLFSINO;
+                                       XFS_DIR_SF_PUT_DIRINO_ARCH(&lino,
+                                                       &namest->inumber, ARCH_CONVERT);
+                                       *buf_dirty = 1;
+                               } else  {
+                                       do_warn(
+"entry references free inode %llu in directory %llu, would clear entry\n",
+                                               lino, ino);
+                               }
+                       }
+               } else if (ino_discovery)  {
+                       add_inode_uncertain(mp, lino, 0);
+               } else  {
+                       do_warn(
+       "bad ino number %llu in dir ino %llu, entry #%d, bno %d\n",
+                               lino, ino, i, da_bno);
+                       if (!no_modify)  {
+                               do_warn("clearing inode number...\n");
+                               lino = NULLFSINO;
+                               XFS_DIR_SF_PUT_DIRINO_ARCH(&lino, &namest->inumber, ARCH_CONVERT);
+                               *buf_dirty = 1;
+                       } else  {
+                               do_warn("would clear inode number...\n");
+                       }
+               }
+               /*
+                * if we have a zero-length entry, trash it.
+                * we may lose the inode (chunk) if we don't
+                * finish the repair successfully and the inode
+                * isn't mentioned anywhere else (like in the inode
+                * tree) but the alternative is to risk losing the
+                * entire directory by trying to use the next byte
+                * to turn the entry into a 1-char entry.  That's
+                * probably a safe bet but if it didn't work, we'd
+                * lose the entire directory the way we currently do
+                * things.  (Maybe we should change that later :-).
+                */
+               if (entry->namelen == 0)  {
+                       *buf_dirty = 1;
+
+                       if (INT_GET(leaf->hdr.count, ARCH_CONVERT) > 1)  {
+                               do_warn(
+       "entry #%d, dir inode %llu, has zero-len name, deleting entry\n",
+                                       i, ino);
+                               ASSERT(INT_GET(leaf->hdr.count, ARCH_CONVERT) > i);
+
+                               bytes = (INT_GET(leaf->hdr.count, ARCH_CONVERT) - i) *
+                                       sizeof(xfs_dir_leaf_entry_t);
+
+                               /*
+                                * compress table unless we're
+                                * only dealing with 1 entry
+                                * (the last one) in which case
+                                * just zero it.
+                                */
+                               if (bytes > sizeof(xfs_dir_leaf_entry_t))  {
+                                       memmove(entry, entry + 1,
+                                               bytes);
+                                       bzero((void *)
+                                               ((__psint_t) entry + bytes),
+                                               sizeof(xfs_dir_leaf_entry_t));
+                               } else  {
+                                       bzero(entry,
+                                               sizeof(xfs_dir_leaf_entry_t));
+                               }
+
+                               /*
+                                * sync vars to match smaller table.
+                                * don't have to worry about freespace
+                                * map since we haven't set it for
+                                * this entry yet.
+                                */
+                               INT_MOD(leaf->hdr.count, ARCH_CONVERT, -1);
+                               i--;
+                               entry--;
+                       } else  {
+                               /*
+                                * if it's the only entry, preserve the
+                                * inode number for now
+                                */
+                               do_warn(
+       "entry #%d, dir inode %llu, has zero-len name, marking entry bad\n",
+                                       i, ino);
+                               INT_SET(entry->nameidx, ARCH_CONVERT, mp->m_sb.sb_blocksize -
+                                               sizeof(xfs_dir_leaf_name_t));
+                               namest = XFS_DIR_LEAF_NAMESTRUCT(leaf,
+                                               INT_GET(entry->nameidx, ARCH_CONVERT));
+                               XFS_DIR_SF_PUT_DIRINO_ARCH(&lino, &namest->inumber, ARCH_CONVERT);
+                               namest->name[0] = '/';
+                       }
+               } else if (INT_GET(entry->nameidx, ARCH_CONVERT) + entry->namelen > XFS_LBSIZE(mp))  {
+                       do_warn(
+"bad size, entry #%d in dir inode %llu, block %u -- entry overflows block\n",
+                       i, ino, da_bno);
+
+                       return(1);
+               }
+
+               start = (__psint_t)&leaf->entries[i] - (__psint_t)leaf;;
+               stop = start + sizeof(xfs_dir_leaf_entry_t);
+
+               if (set_da_freemap(mp, dir_freemap, start, stop))  {
+                       do_warn(
+"dir entry slot %d in block %u conflicts with used space in dir inode %llu\n",
+                               i, da_bno, ino);
+                       return(1);
+               }
+
+               /*
+                * check if the name is legal.  if so, then
+                * check that the name and hashvalues match.
+                *
+                * if the name is illegal, we don't check the
+                * hashvalue computed from it.  we just make
+                * sure that the hashvalue in the entry is
+                * monotonically increasing wrt to the previous
+                * entry.
+                *
+                * Note that we do NOT have to check the length
+                * because the length is stored in a one-byte
+                * unsigned int which max's out at MAXNAMELEN
+                * making it impossible for the stored length
+                * value to be out of range.
+                */
+               bcopy(namest->name, fname, entry->namelen);
+               fname[entry->namelen] = '\0';
+               hashval = libxfs_da_hashname(fname, entry->namelen);
+
+               /*
+                * only complain about illegal names in phase 3 (when
+                * inode discovery is turned on).  Otherwise, we'd complain
+                * a lot during phase 4.  If the name is illegal, leave
+                * the hash value in that entry alone.
+                */
+               nm_illegal = namecheck(fname, entry->namelen);
+
+               if (ino_discovery && nm_illegal)  {
+                       /*
+                        * junk the entry, illegal name
+                        */
+                       if (!no_modify)  {
+                               do_warn(
+       "illegal name \"%s\" in directory inode %llu, entry will be cleared\n",
+                                       fname, ino);
+                               namest->name[0] = '/';
+                               *buf_dirty = 1;
+                       } else  {
+                               do_warn(
+       "illegal name \"%s\" in directory inode %llu, entry would be cleared\n",
+                                       fname, ino);
+                       }
+               } else if (!nm_illegal && INT_GET(entry->hashval, ARCH_CONVERT) != hashval)  {
+                       /*
+                        * try resetting the hashvalue to the correct
+                        * value for the string, if the string has been
+                        * corrupted, too, that will get picked up next
+                        */
+                       do_warn("\tmismatched hash value for entry \"%s\"\n",
+                               fname);
+                       if (!no_modify)  {
+                               do_warn(
+                       "\t\tin directory inode %llu.  resetting hash value.\n",
+                                       ino);
+                               INT_SET(entry->hashval, ARCH_CONVERT, hashval);
+                               *buf_dirty = 1;
+                       } else  {
+                               do_warn(
+               "\t\tin directory inode %llu.  would reset hash value.\n",
+                                       ino);
+                       }
+               }
+               
+               /*
+                * now we can mark entries with NULLFSINO's bad
+                */
+               if (!no_modify && lino == NULLFSINO)  {
+                       namest->name[0] = '/';
+                       *buf_dirty = 1;
+               }
+
+               /*
+                * regardless of whether the entry has or hasn't been
+                * marked for deletion, the hash value ordering must
+                * be maintained.
+                */
+               if (INT_GET(entry->hashval, ARCH_CONVERT) < last_hashval)  {
+                       /*
+                        * blow out the entry -- set hashval to sane value
+                        * and set the first character in the string to
+                        * the illegal value '/'.  Reset the hash value
+                        * to the last hashvalue so that verify_da_path
+                        * will fix up the interior pointers correctly.
+                        * the entry will be deleted later (by routines
+                        * that need only the entry #).  We keep the
+                        * inode number in the entry so we can attach
+                        * the inode to the orphanage later.
+                        */
+                       do_warn("\tbad hash ordering for entry \"%s\"\n",
+                               fname);
+                       if (!no_modify)  {
+                               do_warn(
+               "\t\tin directory inode %llu.  will clear entry\n",
+                                       ino);
+                               INT_SET(entry->hashval, ARCH_CONVERT, last_hashval);
+                               namest->name[0] = '/';
+                               *buf_dirty = 1;
+                       } else  {
+                               do_warn(
+               "\t\tin directory inode %llu.  would clear entry\n",
+                                       ino);
+                       }
+               }
+
+               *next_hashval = last_hashval = INT_GET(entry->hashval, ARCH_CONVERT);
+
+               /*
+                * if heap data conflicts with something,
+                * blow it out and skip the rest of the loop
+                */
+               if (set_da_freemap(mp, dir_freemap, INT_GET(entry->nameidx, ARCH_CONVERT),
+                               INT_GET(entry->nameidx, ARCH_CONVERT) + sizeof(xfs_dir_leaf_name_t) +
+                               entry->namelen - 1))  {
+                       do_warn(
+"name \"%s\" (block %u, slot %d) conflicts with used space in dir inode %llu\n",
+                               fname, da_bno, i, ino);
+                       if (!no_modify)  {
+                               entry->namelen = 0;
+                               *buf_dirty = 1;
+
+                               do_warn(
+               "will clear entry \"%s\" (#%d) in directory inode %llu\n",
+                                       fname, i, ino);
+                       } else  {
+                               do_warn(
+               "would clear entry \"%s\" (#%d)in directory inode %llu\n",
+                                       fname, i, ino);
+                       }
+                       continue;
+               }
+
+               /*
+                * keep track of heap stats (first byte used, total bytes used)
+                */
+               if (INT_GET(entry->nameidx, ARCH_CONVERT) < first_used)
+                       first_used = INT_GET(entry->nameidx, ARCH_CONVERT);
+               bytes_used += entry->namelen;
+
+               /*
+                * special . or .. entry processing
+                */
+               if (entry->namelen == 2 && namest->name[0] == '.' &&
+                                               namest->name[1] == '.') {
+                       /*
+                        * the '..' case
+                        */
+                       if (!*dotdot) {
+                               (*dotdot)++;
+                               *parent = lino;
+#ifdef XR_DIR_TRACE
+       fprintf(stderr, "process_leaf_dir_block found .. entry (parent) = %llu\n", lino);
+#endif
+                               /*
+                                * what if .. == .?  legal only in
+                                * the root inode.  blow out entry
+                                * and set parent to NULLFSINO otherwise.
+                                */
+                               if (ino == lino &&
+                                               ino != mp->m_sb.sb_rootino)  {
+                                       *parent = NULLFSINO;
+                                       do_warn(
+       "bad .. entry in dir ino %llu, points to self",
+                                               ino);
+                                       if (!no_modify)  {
+                                               do_warn("will clear entry\n");
+
+                                               namest->name[0] = '/';
+                                               *buf_dirty = 1;
+                                       } else  {
+                                               do_warn("would clear entry\n");
+                                       }
+                               } else if (ino != lino &&
+                                               ino == mp->m_sb.sb_rootino)  {
+                                       /*
+                                        * we have to make sure that . == ..
+                                        * in the root inode
+                                        */
+                                       if (!no_modify)  {
+                                               do_warn(
+               "correcting .. entry in root inode %llu, was %llu\n",
+                                                       ino, *parent);
+                                               XFS_DIR_SF_PUT_DIRINO_ARCH(
+                                                       &ino,
+                                               &namest->inumber, ARCH_CONVERT);
+                                               *buf_dirty = 1;
+                                       } else  {
+                                               do_warn(
+       "bad .. entry (%llu) in root inode %llu should be %llu\n",
+                                                       *parent,
+                                                       ino, ino);
+                                       }
+                               }
+                       } else  {
+                               /*
+                                * can't fix the directory unless we know
+                                * which .. entry is the right one.  Both
+                                * have valid inode numbers, match the hash
+                                * value and the hash values are ordered
+                                * properly or we wouldn't be here.  So
+                                * since both seem equally valid, trash
+                                * this one.
+                                */
+                               if (!no_modify)  {
+                                       do_warn(
+"multiple .. entries in directory inode %llu, will clear second entry\n",
+                                               ino);
+                                       namest->name[0] = '/';
+                                       *buf_dirty = 1;
+                               } else  {
+                                       do_warn(
+"multiple .. entries in directory inode %llu, would clear second entry\n",
+                                               ino);
+                               }
+                       }
+               } else if (entry->namelen == 1 && namest->name[0] == '.')  {
+                       /*
+                        * the '.' case
+                        */
+                       if (!*dot)  {
+                               (*dot)++;
+                               if (lino != ino)  {
+                                       if (!no_modify)  {
+                                               do_warn(
+       ". in directory inode %llu has wrong value (%llu), fixing entry...\n",
+                                                       ino, lino);
+                                               XFS_DIR_SF_PUT_DIRINO_ARCH(&ino,
+                                                       &namest->inumber, ARCH_CONVERT);
+                                               *buf_dirty = 1;
+                                       } else  {
+                                               do_warn(
+                       ". in directory inode %llu has wrong value (%llu)\n",
+                                                       ino, lino);
+                                       }
+                               }
+                       } else  {
+                               do_warn(
+                               "multiple . entries in directory inode %llu\n",
+                                       ino);
+                               /*
+                                * mark entry as to be junked.
+                                */
+                               if (!no_modify)  {
+                                       do_warn(
+                       "will clear one . entry in directory inode %llu\n",
+                                               ino);
+                                       namest->name[0] = '/';
+                                       *buf_dirty = 1;
+                               } else  {
+                                       do_warn(
+                       "would clear one . entry in directory inode %llu\n",
+                                               ino);
+                               }
+                       }
+               } else  {
+                       /*
+                        * all the rest -- make sure only . references self
+                        */
+                       if (lino == ino)  {
+                               do_warn(
+                       "entry \"%s\" in directory inode %llu points to self, ",
+                                       fname, ino);
+                               if (!no_modify)  {
+                                       do_warn("will clear entry\n");
+                                       namest->name[0] = '/';
+                                       *buf_dirty = 1;
+                               } else  {
+                                       do_warn("would clear entry\n");
+                               }
+                       }
+               }
+       }
+
+       /*
+        * compare top of heap values and reset as required.  if the
+        * holes flag is set, don't reset first_used unless it's
+        * pointing to used bytes.  we're being conservative here
+        * since the block will get compacted anyhow by the kernel.
+        */
+       if (leaf->hdr.holes == 0 && first_used != INT_GET(leaf->hdr.firstused, ARCH_CONVERT) ||
+                       INT_GET(leaf->hdr.firstused, ARCH_CONVERT) > first_used)  {
+               if (!no_modify)  {
+                       if (verbose)
+                               do_warn(
+"- resetting first used heap value from %d to %d in block %u of dir ino %llu\n",
+                                       (int) INT_GET(leaf->hdr.firstused, ARCH_CONVERT), first_used,
+                                       da_bno, ino);
+                       INT_SET(leaf->hdr.firstused, ARCH_CONVERT, first_used);
+                       *buf_dirty = 1;
+               } else  {
+                       if (verbose)
+                               do_warn(
+"- would reset first used value from %d to %d in block %u of dir ino %llu\n",
+                                       (int) INT_GET(leaf->hdr.firstused, ARCH_CONVERT), first_used,
+                                       da_bno, ino);
+               }
+       }
+
+       if (bytes_used != INT_GET(leaf->hdr.namebytes, ARCH_CONVERT))  {
+               if (!no_modify)  {
+                       if (verbose)
+                               do_warn(
+"- resetting namebytes cnt from %d to %d in block %u of dir inode %llu\n",
+                                       (int) INT_GET(leaf->hdr.namebytes, ARCH_CONVERT), bytes_used,
+                                       da_bno, ino);
+                       INT_SET(leaf->hdr.namebytes, ARCH_CONVERT, bytes_used);
+                       *buf_dirty = 1;
+               } else  {
+                       if (verbose)
+                               do_warn(
+"- would reset namebytes cnt from %d to %d in block %u of dir inode %llu\n",
+                                       (int) INT_GET(leaf->hdr.namebytes, ARCH_CONVERT), bytes_used,
+                                       da_bno, ino);
+               }
+       }
+
+       /*
+        * If the hole flag is not set, then we know that there can
+        * be no lost holes.  If the hole flag is set, then it's ok
+        * if the on-disk holemap doesn't describe everything as long
+        * as what it does describe doesn't conflict with reality.
+        */
+
+       reset_holes = 0;
+
+       bholemap.lost_holes = leaf->hdr.holes;
+       for (i = 0; i < XFS_DIR_LEAF_MAPSIZE; i++)  {
+               bholemap.hentries[i].base = INT_GET(leaf->hdr.freemap[i].base, ARCH_CONVERT);
+               bholemap.hentries[i].size = INT_GET(leaf->hdr.freemap[i].size, ARCH_CONVERT);
+       }
+
+       /*
+        * Ok, now set up our own freespace list
+        * (XFS_DIR_LEAF_MAPSIZE (3) * biggest regions)
+        * and see if they match what's in the block
+        */
+       bzero(&holemap, sizeof(da_hole_map_t));
+       process_da_freemap(mp, dir_freemap, &holemap);
+
+       if (zero_len_entries)  {
+               reset_holes = 1;
+       } else if (leaf->hdr.holes == 0)  {
+               if (holemap.lost_holes > 0)  {
+                       if (verbose)
+                               do_warn(
+       "- found unexpected lost holes in block %u, dir inode %llu\n",
+                                       da_bno, ino);
+
+                       reset_holes = 1;
+               } else if (compare_da_freemaps(mp, &holemap, &bholemap,
+                               XFS_DIR_LEAF_MAPSIZE, ino, da_bno))  {
+                       if (verbose)
+                               do_warn(
+                       "- hole info non-optimal in block %u, dir inode %llu\n",
+                                       da_bno, ino);
+                       reset_holes = 1;
+               }
+       } else if (verify_da_freemap(mp, dir_freemap, &holemap, ino, da_bno))  {
+               if (verbose)
+                       do_warn(
+                       "- hole info incorrect in block %u, dir inode %llu\n",
+                               da_bno, ino);
+               reset_holes = 1;
+       }
+
+       if (reset_holes)  {
+               /*
+                * have to reset block hole info
+                */
+               if (verbose)  {
+                       do_warn(
+       "- existing hole info for block %d, dir inode %llu (base, size) - \n",
+                               da_bno, ino);
+                       do_warn("- \t");
+                       for (i = 0; i < XFS_DIR_LEAF_MAPSIZE; i++)  {
+                               do_warn(
+                               "- (%d, %d) ", bholemap.hentries[i].base,
+                                       bholemap.hentries[i].size);  
+                       }
+                       do_warn("- holes flag = %d\n", bholemap.lost_holes);
+               }
+
+               if (!no_modify)  {
+                       if (verbose)
+                               do_warn(
+               "- compacting block %u in dir inode %llu\n",
+                                       da_bno, ino);
+
+                       new_leaf = (xfs_dir_leafblock_t *) &dirbuf[0];
+
+                       /*
+                        * copy leaf block header
+                        */
+                       bcopy(&leaf->hdr, &new_leaf->hdr,
+                               sizeof(xfs_dir_leaf_hdr_t));
+
+                       /*
+                        * reset count in case we have some zero length entries
+                        * that are being junked
+                        */
+                       num_entries = 0;
+                       first_used = XFS_LBSIZE(mp);
+                       first_byte = (char *) new_leaf
+                                       + (__psint_t) XFS_LBSIZE(mp);
+
+                       /*
+                        * copy entry table and pack names starting from the end
+                        * of the block
+                        */
+                       for (i = 0, s_entry = &leaf->entries[0],
+                                       d_entry = &new_leaf->entries[0];
+                                       i < INT_GET(leaf->hdr.count, ARCH_CONVERT);
+                                       i++, s_entry++)  {
+                               /*
+                                * skip zero-length entries
+                                */
+                               if (s_entry->namelen == 0)
+                                       continue;
+
+                               bytes = sizeof(xfs_dir_leaf_name_t)
+                                       + s_entry->namelen - 1;
+
+                               if ((__psint_t) first_byte - bytes <
+                                               sizeof(xfs_dir_leaf_entry_t)
+                                               + (__psint_t) d_entry)  {
+                                       do_warn(
+       "not enough space in block %u of dir inode %llu for all entries\n",
+                                               da_bno, ino);
+                                       break;
+                               }
+
+                               first_used -= bytes;
+                               first_byte -= bytes;
+
+                               INT_SET(d_entry->nameidx, ARCH_CONVERT, first_used);
+                               INT_SET(d_entry->hashval, ARCH_CONVERT, INT_GET(s_entry->hashval, ARCH_CONVERT));
+                               d_entry->namelen = s_entry->namelen;
+                               d_entry->pad2 = 0;
+
+                               bcopy((char *) leaf + INT_GET(s_entry->nameidx, ARCH_CONVERT),
+                                       first_byte, bytes);
+
+                               num_entries++;
+                               d_entry++;
+                       }
+
+                       ASSERT((char *) first_byte >= (char *) d_entry);
+                       ASSERT(first_used <= XFS_LBSIZE(mp));
+
+                       /*
+                        * zero space between end of table and top of heap
+                        */
+                       bzero(d_entry, (__psint_t) first_byte
+                                       - (__psint_t) d_entry);
+
+                       /*
+                        * reset header info
+                        */
+                       if (num_entries != INT_GET(new_leaf->hdr.count, ARCH_CONVERT))
+                               INT_SET(new_leaf->hdr.count, ARCH_CONVERT, num_entries);
+
+                       INT_SET(new_leaf->hdr.firstused, ARCH_CONVERT, first_used);
+                       new_leaf->hdr.holes = 0;
+                       new_leaf->hdr.pad1 = 0;
+
+                       INT_SET(new_leaf->hdr.freemap[0].base, ARCH_CONVERT, (__psint_t) d_entry
+                                                       - (__psint_t) new_leaf);
+                       INT_SET(new_leaf->hdr.freemap[0].size, ARCH_CONVERT, (__psint_t) first_byte
+                                                       - (__psint_t) d_entry);
+
+                       ASSERT(INT_GET(new_leaf->hdr.freemap[0].base, ARCH_CONVERT) < first_used);
+                       ASSERT(INT_GET(new_leaf->hdr.freemap[0].base, ARCH_CONVERT) ==
+                                       (__psint_t) (&new_leaf->entries[0])
+                                       - (__psint_t) new_leaf
+                                       + i * sizeof(xfs_dir_leaf_entry_t));
+                       ASSERT(INT_GET(new_leaf->hdr.freemap[0].base, ARCH_CONVERT) < XFS_LBSIZE(mp));
+                       ASSERT(INT_GET(new_leaf->hdr.freemap[0].size, ARCH_CONVERT) < XFS_LBSIZE(mp));
+                       ASSERT(INT_GET(new_leaf->hdr.freemap[0].base, ARCH_CONVERT) +
+                               INT_GET(new_leaf->hdr.freemap[0].size, ARCH_CONVERT) == first_used);
+
+                       INT_ZERO(new_leaf->hdr.freemap[1].base, ARCH_CONVERT);
+                       INT_ZERO(new_leaf->hdr.freemap[1].size, ARCH_CONVERT);
+                       INT_ZERO(new_leaf->hdr.freemap[2].base, ARCH_CONVERT);
+                       INT_ZERO(new_leaf->hdr.freemap[2].size, ARCH_CONVERT);
+
+                       /*
+                        * final step, copy block back
+                        */
+                       bcopy(new_leaf, leaf, mp->m_sb.sb_blocksize);
+
+                       *buf_dirty = 1;
+               } else  {
+                       if (verbose)
+                               do_warn(
+                       "- would compact block %u in dir inode %llu\n",
+                                       da_bno, ino);
+               }
+       }
+#if 0
+       if (!no_modify)  {
+               /*
+                * now take care of deleting or marking the entries with
+                * zero-length namelen's
+                */
+               junk_zerolen_dir_leaf_entries(mp, leaf, ino, buf_dirty);
+       }
+#endif
+#ifdef XR_DIR_TRACE
+       fprintf(stderr, "process_leaf_dir_block returns %d\n", res);
+#endif
+       return((res > 0) ? 1 : 0);
+}
+
+/*
+ * returns 0 if the directory is ok, 1 if it has to be junked.
+ */
+int
+process_leaf_dir_level(xfs_mount_t     *mp,
+                       da_bt_cursor_t  *da_cursor,
+                       int             ino_discovery,
+                       int             *repair,
+                       int             *dot,
+                       int             *dotdot,
+                       xfs_ino_t       *parent)
+{
+       xfs_dir_leafblock_t     *leaf;
+       xfs_buf_t               *bp;
+       xfs_ino_t               ino;
+       xfs_dfsbno_t            dev_bno;
+       xfs_dablk_t             da_bno;
+       xfs_dablk_t             prev_bno;
+       int                     res = 0;
+       int                     buf_dirty = 0;
+       xfs_daddr_t             bd_addr;
+       xfs_dahash_t            current_hashval = 0;
+       xfs_dahash_t            greatest_hashval;
+
+#ifdef XR_DIR_TRACE
+       fprintf(stderr, "process_leaf_dir_level - ino %llu\n", da_cursor->ino);
+#endif
+       *repair = 0;
+       da_bno = da_cursor->level[0].bno;
+       ino = da_cursor->ino;
+       prev_bno = 0;
+
+       do {
+               dev_bno = blkmap_get(da_cursor->blkmap, da_bno);
+               /*
+                * directory code uses 0 as the NULL block pointer
+                * since 0 is the root block and no directory block
+                * pointer can point to the root block of the btree
+                */
+               ASSERT(da_bno != 0);
+
+               if (dev_bno == NULLDFSBNO) {
+                       do_warn("can't map block %u for directory inode %llu\n",
+                               da_bno, ino);
+                       goto error_out;
+               }
+
+               bd_addr = (xfs_daddr_t)XFS_FSB_TO_DADDR(mp, dev_bno);
+
+               bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, dev_bno),
+                                       XFS_FSB_TO_BB(mp, 1), 0);
+               if (!bp) {
+                       do_warn("can't read file block %u (fsbno %llu, daddr %lld) "
+                               "for directory inode %llu\n",
+                               da_bno, dev_bno, (__int64_t) bd_addr, ino);
+                       goto error_out;
+               }
+
+               leaf = (xfs_dir_leafblock_t *)XFS_BUF_PTR(bp);
+
+               /*
+                * check magic number for leaf directory btree block
+                */
+               if (INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) != XFS_DIR_LEAF_MAGIC) {
+                       do_warn("bad directory leaf magic # %#x for dir ino %llu\n",
+                               INT_GET(leaf->hdr.info.magic, ARCH_CONVERT), ino);
+                       libxfs_putbuf(bp);
+                       goto error_out;
+               }
+               /*
+                * keep track of greatest block # -- that gets
+                * us the length of the directory
+                */
+               if (da_bno > da_cursor->greatest_bno)
+                       da_cursor->greatest_bno = da_bno;
+
+               buf_dirty = 0;
+               /*
+                * for each block, process the block, verify it's path,
+                * then get next block.  update cursor values along the way
+                */
+               if (process_leaf_dir_block(mp, leaf, da_bno, ino,
+                               current_hashval, ino_discovery,
+                               da_cursor->blkmap, dot, dotdot, parent,
+                               &buf_dirty, &greatest_hashval))  {
+                       libxfs_putbuf(bp);
+                       goto error_out;
+               }
+
+               /*
+                * index can be set to hdr.count so match the
+                * indexes of the interior blocks -- which at the
+                * end of the block will point to 1 after the final
+                * real entry in the block
+                */
+               da_cursor->level[0].hashval = greatest_hashval;
+               da_cursor->level[0].bp = bp;
+               da_cursor->level[0].bno = da_bno;
+               da_cursor->level[0].index = INT_GET(leaf->hdr.count, ARCH_CONVERT);
+               da_cursor->level[0].dirty = buf_dirty;
+
+               if (INT_GET(leaf->hdr.info.back, ARCH_CONVERT) != prev_bno)  {
+                       do_warn("bad sibling back pointer for directory block %u "
+                               "in directory inode %llu\n", da_bno, ino);
+                       libxfs_putbuf(bp);
+                       goto error_out;
+               }
+
+               prev_bno = da_bno;
+               da_bno = INT_GET(leaf->hdr.info.forw, ARCH_CONVERT);
+
+               if (da_bno != 0)
+                       if (verify_da_path(mp, da_cursor, 0))  {
+                               libxfs_putbuf(bp);
+                               goto error_out;
+                       }
+
+               current_hashval = greatest_hashval;
+
+               ASSERT(buf_dirty == 0 || buf_dirty && !no_modify);
+
+               if (buf_dirty && !no_modify)  {
+                       *repair = 1;
+                       libxfs_writebuf(bp, 0);
+               }
+               else
+                       libxfs_putbuf(bp);
+       } while (da_bno != 0 && res == 0);
+
+       if (verify_final_da_path(mp, da_cursor, 0))  {
+               /*
+                * verify the final path up (right-hand-side) if still ok
+                */
+               do_warn("bad hash path in directory %llu\n", da_cursor->ino);
+               goto error_out;
+       }
+
+#ifdef XR_DIR_TRACE
+       fprintf(stderr, "process_leaf_dir_level returns %d (%s)\n",
+               res, ((res) ? "bad" : "ok"));
+#endif
+       /*
+        * redundant but just for testing
+        */
+       release_da_cursor(mp, da_cursor, 0);
+
+       return(res);
+
+error_out:
+       /*
+        * release all buffers holding interior btree blocks
+        */
+       err_release_da_cursor(mp, da_cursor, 0);
+
+       return(1);
+}
+
+/*
+ * a node directory is a true btree directory -- where the directory
+ * has gotten big enough that it is represented as a non-trivial (e.g.
+ * has more than just a root block) btree.
+ *
+ * Note that if we run into any problems, we trash the
+ * directory.  Even if it's the root directory,
+ * we'll be able to traverse all the disconnected
+ * subtrees later (phase 6).
+ *
+ * one day, if we actually fix things, we'll set repair to 1 to
+ * indicate that we have or that we should.
+ *
+ * dirname can be set to NULL if the name is unknown (or to
+ * the string representation of the inode)
+ *
+ * returns 0 if things are ok, 1 if bad (directory needs to be junked)
+ */
+/* ARGSUSED */
+int
+process_node_dir(
+       xfs_mount_t     *mp,
+       xfs_ino_t       ino,
+       xfs_dinode_t    *dip,
+       int             ino_discovery,
+       blkmap_t        *blkmap,
+       int             *dot,
+       int             *dotdot,
+       xfs_ino_t       *parent,        /* out - parent ino #  or NULLFSINO */
+       char            *dirname,
+       int             *repair)
+{
+       xfs_dablk_t                     bno;
+       int                             error = 0;
+       da_bt_cursor_t                  da_cursor;
+
+#ifdef XR_DIR_TRACE
+       fprintf(stderr, "process_node_dir - ino %llu\n", ino);
+#endif
+       *repair = *dot = *dotdot = 0;
+       *parent = NULLFSINO;
+
+       /*
+        * try again -- traverse down left-side of tree until we hit
+        * the left-most leaf block setting up the btree cursor along
+        * the way.  Then walk the leaf blocks left-to-right, calling
+        * a parent-verification routine each time we traverse a block.
+        */
+       bzero(&da_cursor, sizeof(da_bt_cursor_t));
+
+       da_cursor.active = 0;
+       da_cursor.type = 0;
+       da_cursor.ino = ino;
+       da_cursor.dip = dip;
+       da_cursor.greatest_bno = 0;
+       da_cursor.blkmap = blkmap;
+
+       /*
+        * now process interior node
+        */
+
+       error = traverse_int_dablock(mp, &da_cursor, &bno, XFS_DATA_FORK);
+
+       if (error == 0)
+               return(1);
+
+       /*
+        * now pass cursor and bno into leaf-block processing routine
+        * the leaf dir level routine checks the interior paths
+        * up to the root including the final right-most path.
+        */
+
+       error = process_leaf_dir_level(mp, &da_cursor, ino_discovery,
+                                       repair, dot, dotdot, parent);
+
+       if (error)
+               return(1);
+
+       /*
+        * sanity check inode size
+        */
+       if (INT_GET(dip->di_core.di_size, ARCH_CONVERT) <
+                       (da_cursor.greatest_bno + 1) * mp->m_sb.sb_blocksize)  {
+               if ((xfs_fsize_t) (da_cursor.greatest_bno
+                               * mp->m_sb.sb_blocksize) > UINT_MAX)  { 
+                       do_warn(
+"out of range internal directory block numbers (inode %llu)\n",
+                               ino);
+                       return(1);
+               }
+
+               do_warn(
+"setting directory inode (%llu) size to %llu bytes, was %lld bytes\n",
+                       ino,
+                       (xfs_dfiloff_t) (da_cursor.greatest_bno + 1)
+                               * mp->m_sb.sb_blocksize,
+                       INT_GET(dip->di_core.di_size, ARCH_CONVERT));
+
+               INT_SET(dip->di_core.di_size, ARCH_CONVERT, (xfs_fsize_t)
+                       (da_cursor.greatest_bno + 1) * mp->m_sb.sb_blocksize);
+       }
+       return(0);
+}
+
+/*
+ * a leaf directory is one where the directory is too big for
+ * the inode data fork but is small enough to fit into one
+ * directory btree block (filesystem block) outside the inode
+ *
+ * returns NULLFSINO if the directory is cannot be salvaged
+ * and the .. ino if things are ok (even if the directory had
+ * to be altered to make it ok).
+ *
+ * dirname can be set to NULL if the name is unknown (or to
+ * the string representation of the inode)
+ *
+ * returns 0 if things are ok, 1 if bad (directory needs to be junked)
+ */
+/* ARGSUSED */
+int
+process_leaf_dir(
+       xfs_mount_t     *mp,
+       xfs_ino_t       ino,
+       xfs_dinode_t    *dip,
+       int             ino_discovery,
+       int             *dino_dirty,
+       blkmap_t        *blkmap,
+       int             *dot,           /* out - 1 if there is a dot, else 0 */
+       int             *dotdot,        /* out - 1 if there's a dotdot, else 0 */
+       xfs_ino_t       *parent,        /* out - parent ino #  or NULLFSINO */
+       char            *dirname,       /* in - directory pathname */
+       int             *repair)        /* out - 1 if something was fixed */
+{
+       xfs_dir_leafblock_t     *leaf;
+       xfs_dahash_t    next_hashval;
+       xfs_dfsbno_t    bno;
+       xfs_buf_t       *bp;
+       int             buf_dirty = 0;
+
+#ifdef XR_DIR_TRACE
+       fprintf(stderr, "process_leaf_dir - ino %llu\n", ino);
+#endif
+       *repair = *dot = *dotdot = 0;
+       *parent = NULLFSINO;
+
+       bno = blkmap_get(blkmap, 0);
+       if (bno == NULLDFSBNO) {
+               do_warn("block 0 for directory inode %llu is missing\n", ino);
+               return(1);
+       }
+       bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, bno),
+                       XFS_FSB_TO_BB(mp, 1), 0);
+       if (!bp) {
+               do_warn("can't read block 0 for directory inode %llu\n", ino);
+               return(1);
+       }
+       /*
+        * verify leaf block
+        */
+       leaf = (xfs_dir_leafblock_t *)XFS_BUF_PTR(bp);
+
+       /*
+        * check magic number for leaf directory btree block
+        */
+       if (INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) != XFS_DIR_LEAF_MAGIC) {
+               do_warn("bad directory leaf magic # %#x for dir ino %llu\n",
+                       INT_GET(leaf->hdr.info.magic, ARCH_CONVERT), ino);
+               libxfs_putbuf(bp);
+               return(1);
+       }
+
+       if (process_leaf_dir_block(mp, leaf, 0, ino, 0, ino_discovery, blkmap,
+                       dot, dotdot, parent, &buf_dirty, &next_hashval)) {
+               /*
+                * the block is bad.  lose the directory.
+                * XXX - later, we should try and just lose
+                * the block without losing the entire directory
+                */
+               ASSERT(*dotdot == 0 || *dotdot == 1 && *parent != NULLFSINO);
+               libxfs_putbuf(bp);
+               return(1);
+       }
+
+       /*
+        * check sibling pointers in leaf block (above doesn't do it)
+        */
+       if (INT_GET(leaf->hdr.info.forw, ARCH_CONVERT) != 0 ||
+                               INT_GET(leaf->hdr.info.back, ARCH_CONVERT) != 0)  {
+               if (!no_modify)  {
+                       do_warn("clearing forw/back pointers for directory inode "
+                               "%llu\n", ino);
+                       buf_dirty = 1;
+                       INT_ZERO(leaf->hdr.info.forw, ARCH_CONVERT);
+                       INT_ZERO(leaf->hdr.info.back, ARCH_CONVERT);
+               } else  {
+                       do_warn("would clear forw/back pointers for directory inode "
+                               "%llu\n", ino);
+               }
+       }
+
+       ASSERT(buf_dirty == 0 || buf_dirty && !no_modify);
+
+       if (buf_dirty && !no_modify)
+               libxfs_writebuf(bp, 0);
+       else
+               libxfs_putbuf(bp);
+
+       return(0);
+}
+
+/*
+ * returns 1 if things are bad (directory needs to be junked)
+ * and 0 if things are ok.  If ino_discovery is 1, add unknown
+ * inodes to uncertain inode list.
+ */
+int
+process_dir(
+       xfs_mount_t     *mp,
+       xfs_ino_t       ino,
+       xfs_dinode_t    *dip,
+       int             ino_discovery,
+       int             *dino_dirty,
+       char            *dirname,
+       xfs_ino_t       *parent,
+       blkmap_t        *blkmap)
+{
+       int             dot;
+       int             dotdot;
+       int             repair = 0;
+       int             res = 0;
+
+       *parent = NULLFSINO;
+       dot = dotdot = 0;
+
+       /*
+        * branch off depending on the type of inode.  This routine
+        * is only called ONCE so all the subordinate routines will
+        * fix '.' and junk '..' if they're bogus.
+        */
+       if (INT_GET(dip->di_core.di_size, ARCH_CONVERT) <= XFS_DFORK_DSIZE_ARCH(dip, mp, ARCH_CONVERT))  {
+               dot = 1;
+               dotdot = 1;
+               if (process_shortform_dir(mp, ino, dip, ino_discovery,
+                               dino_dirty, parent, dirname, &repair))  {
+                       res = 1;
+               }
+       } else if (INT_GET(dip->di_core.di_size, ARCH_CONVERT) <= XFS_LBSIZE(mp))  {
+               if (process_leaf_dir(mp, ino, dip, ino_discovery,
+                               dino_dirty, blkmap, &dot, &dotdot,
+                               parent, dirname, &repair))  {
+                       res = 1;
+               }
+       } else  {
+               if (process_node_dir(mp, ino, dip, ino_discovery,
+                               blkmap, &dot, &dotdot,
+                               parent, dirname, &repair))  {
+                       res = 1;
+               }
+       }
+       /*
+        * bad . entries in all directories will be fixed up in phase 6
+        */
+       if (dot == 0) {
+               do_warn("no . entry for directory %llu\n", ino);
+       }
+
+       /*
+        * shortform dirs always have a .. entry.  .. for all longform
+        * directories will get fixed in phase 6. .. for other shortform
+        * dirs also get fixed there.  .. for a shortform root was
+        * fixed in place since we know what it should be
+        */
+       if (dotdot == 0 && ino != mp->m_sb.sb_rootino) {
+               do_warn("no .. entry for directory %llu\n", ino);
+       } else if (dotdot == 0 && ino == mp->m_sb.sb_rootino) {
+               do_warn("no .. entry for root directory %llu\n", ino);
+               need_root_dotdot = 1;
+       }
+       
+#ifdef XR_DIR_TRACE
+       fprintf(stderr, "(process_dir), parent of %llu is %llu\n", ino, parent);
+#endif
+       return(res);
+}
diff --git a/repair/dir.h b/repair/dir.h
new file mode 100644 (file)
index 0000000..9d2b069
--- /dev/null
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#ifndef _XR_DIR_H
+#define _XR_DIR_H
+
+struct blkmap;
+
+/* 1 bit per byte, max XFS blocksize == 64K bits / NBBY */
+#define DA_BMAP_SIZE           8192
+
+typedef unsigned char  da_freemap_t;
+
+/*
+ * the cursor gets passed up and down the da btree processing
+ * routines.  The interior block processing routines use the
+ * cursor to determine if the pointers to and from the preceding
+ * and succeeding sibling blocks are ok and whether the values in
+ * the current block are consistent with the entries in the parent
+ * nodes.  When a block is traversed, a parent-verification routine
+ * is called to verify if the next logical entry in the next level up
+ * is consistent with the greatest hashval in the next block of the
+ * current level.  The verification routine is itself recursive and
+ * calls itself if it has to traverse an interior block to get
+ * the next logical entry.  The routine recurses upwards through
+ * the tree until it finds a block where it can simply step to
+ * the next entry.  The hashval in that entry should be equal to
+ * the hashval being passed to it (the greatest hashval in the block
+ * that the entry points to).  If that isn't true, then the tree
+ * is blown and we need to trash it, salvage and trash it, or fix it.
+ * Currently, we just trash it.
+ */
+typedef struct da_level_state  {
+       xfs_buf_t       *bp;            /* block bp */
+#ifdef XR_DIR_TRACE
+       xfs_da_intnode_t *n;            /* bp data */
+#endif
+       xfs_dablk_t     bno;            /* file block number */
+       xfs_dahash_t    hashval;        /* last verified hashval */
+       int             index;          /* current index in block */
+       int             dirty;          /* is buffer dirty ? (1 == yes) */
+} da_level_state_t;
+
+typedef struct da_bt_cursor  {
+       int                     active; /* highest level in tree (# levels-1) */
+       int                     type;   /* 0 if dir, 1 if attr */
+       xfs_ino_t               ino;
+       xfs_dablk_t             greatest_bno;
+       xfs_dinode_t            *dip;
+       da_level_state_t        level[XFS_DA_NODE_MAXDEPTH];
+       struct blkmap           *blkmap;
+} da_bt_cursor_t;
+
+
+/* ROUTINES */
+
+void
+err_release_da_cursor(
+       xfs_mount_t     *mp,
+       da_bt_cursor_t  *cursor,
+       int             prev_level);
+
+xfs_dfsbno_t
+get_first_dblock_fsbno(
+       xfs_mount_t     *mp,
+       xfs_ino_t       ino,
+       xfs_dinode_t    *dino);
+
+void
+init_da_freemap(
+       da_freemap_t *dir_freemap);
+
+int
+namecheck(
+       char            *name, 
+       int             length);
+
+int
+process_shortform_dir(
+       xfs_mount_t     *mp,
+       xfs_ino_t       ino,
+       xfs_dinode_t    *dip,
+       int             ino_discovery,
+       int             *dino_dirty,    /* is dinode buffer dirty? */
+       xfs_ino_t       *parent,        /* out - NULLFSINO if entry doesn't exist */
+       char            *dirname,       /* directory pathname */
+       int             *repair);       /* out - 1 if dir was fixed up */
+
+int
+process_dir(
+       xfs_mount_t     *mp,
+       xfs_ino_t       ino,
+       xfs_dinode_t    *dip,
+       int             ino_discovery,
+       int             *dirty,
+       char            *dirname,
+       xfs_ino_t       *parent,
+       struct blkmap   *blkmap);
+
+void
+release_da_cursor(
+       xfs_mount_t     *mp,
+       da_bt_cursor_t  *cursor,
+       int             prev_level);
+
+int
+set_da_freemap(
+       xfs_mount_t *mp, da_freemap_t *map,
+       int start, int stop);
+
+int
+traverse_int_dablock(
+       xfs_mount_t     *mp,
+       da_bt_cursor_t          *da_cursor,
+       xfs_dablk_t             *rbno,
+       int                     whichfork);
+
+int
+verify_da_path(
+       xfs_mount_t     *mp,
+       da_bt_cursor_t          *cursor,
+       const int               p_level);
+
+int
+verify_final_da_path(
+       xfs_mount_t     *mp,
+       da_bt_cursor_t          *cursor,
+       const int               p_level);
+
+
+#endif /* _XR_DIR_H */
diff --git a/repair/dir2.c b/repair/dir2.c
new file mode 100644 (file)
index 0000000..e2675df
--- /dev/null
@@ -0,0 +1,2070 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "avl.h"
+#include "globals.h"
+#include "incore.h"
+#include "err_protos.h"
+#include "dinode.h"
+#include "dir.h"
+#include "dir2.h"
+#include "bmap.h"
+
+/*
+ * Tag bad directory entries with this.
+ * We can't tag them with -1 since that will look like a 
+ * data_unused_t instead of a data_entry_t.
+ */
+#define        BADFSINO        ((xfs_ino_t)0xfeffffffffffffffULL)
+
+/*
+ * Known bad inode list.  These are seen when the leaf and node
+ * block linkages are incorrect.
+ */
+typedef struct dir2_bad {
+       xfs_ino_t       ino;
+       struct dir2_bad *next;
+} dir2_bad_t;
+dir2_bad_t *dir2_bad_list;
+
+void
+dir2_add_badlist(
+       xfs_ino_t       ino)
+{
+       dir2_bad_t      *l;
+
+       if ((l = malloc(sizeof(dir2_bad_t))) == NULL) {
+               do_error("malloc failed (%u bytes) dir2_add_badlist:ino %llu\n",
+                       sizeof(dir2_bad_t), ino);
+               exit(1);
+       }
+       l->next = dir2_bad_list;
+       dir2_bad_list = l;
+       l->ino = ino;
+}
+
+int
+dir2_is_badino(
+       xfs_ino_t       ino)
+{
+       dir2_bad_t      *l;
+
+       for (l = dir2_bad_list; l; l = l->next)
+               if (l->ino == ino)
+                       return 1;
+       return 0;
+}
+
+/*
+ * Multibuffer handling.
+ * V2 directory blocks can be noncontiguous, needing multiple buffers.
+ */
+xfs_dabuf_t *
+da_read_buf(
+       xfs_mount_t     *mp,
+       int             nex,
+       bmap_ext_t      *bmp)
+{
+       xfs_buf_t       *bp;
+       xfs_buf_t       **bplist;
+       xfs_dabuf_t     *dabuf;
+       int             i;
+       int             off;
+
+       bplist = calloc(nex, sizeof(*bplist));
+       if (bplist == NULL) {
+               do_error("couldn't malloc dir2 buffer list\n");
+               exit(1);
+       }
+       for (i = 0; i < nex; i++) {
+               bplist[i] = libxfs_readbuf(mp->m_dev,
+                               XFS_FSB_TO_DADDR(mp, bmp[i].startblock),
+                               XFS_FSB_TO_BB(mp, bmp[i].blockcount), 0);
+               if (!bplist[i])
+                       goto failed;
+       }
+       dabuf = malloc(XFS_DA_BUF_SIZE(nex));
+       if (dabuf == NULL) {
+               do_error("couldn't malloc dir2 buffer header\n");
+               exit(1);
+       }
+       dabuf->dirty = 0;
+       dabuf->nbuf = nex;
+       if (nex == 1) {
+               bp = bplist[0];
+               dabuf->bbcount = (short)BTOBB(XFS_BUF_COUNT(bp));
+               dabuf->data = XFS_BUF_PTR(bp);
+               dabuf->bps[0] = bp;
+       } else {
+               for (i = 0, dabuf->bbcount = 0; i < nex; i++) {
+                       dabuf->bps[i] = bp = bplist[i];
+                       dabuf->bbcount += BTOBB(XFS_BUF_COUNT(bp));
+               }
+               dabuf->data = malloc(BBTOB(dabuf->bbcount));
+               if (dabuf->data == NULL) {
+                       do_error("couldn't malloc dir2 buffer data\n");
+                       exit(1);
+               }
+               for (i = off = 0; i < nex; i++, off += XFS_BUF_COUNT(bp)) {
+                       bp = bplist[i];
+                       bcopy(XFS_BUF_PTR(bp), (char *)dabuf->data + off,
+                               XFS_BUF_COUNT(bp));
+               }
+       }
+       return dabuf;
+failed:
+       for (i = 0; i < nex; i++)
+               libxfs_putbuf(bplist[i]);
+       free(bplist);
+       return NULL;
+}
+
+static void
+da_buf_clean(
+       xfs_dabuf_t     *dabuf)
+{
+       xfs_buf_t       *bp;
+       int             i;
+       int             off;
+
+       if (dabuf->dirty) {
+               dabuf->dirty = 0;
+               for (i=off=0; i < dabuf->nbuf; i++, off += XFS_BUF_COUNT(bp)) {
+                       bp = dabuf->bps[i];
+                       bcopy((char *)dabuf->data + off, XFS_BUF_PTR(bp),
+                               XFS_BUF_COUNT(bp));
+               }
+       }
+}
+
+static void
+da_buf_done(
+       xfs_dabuf_t     *dabuf)
+{
+       da_buf_clean(dabuf);
+       if (dabuf->nbuf > 1)
+               free(dabuf->data);
+       free(dabuf);
+}
+
+int
+da_bwrite(
+       xfs_mount_t     *mp,
+       xfs_dabuf_t     *dabuf)
+{
+       xfs_buf_t       *bp;
+       xfs_buf_t       **bplist;
+       int             e;
+       int             error;
+       int             i;
+       int             nbuf;
+
+       if ((nbuf = dabuf->nbuf) == 1) {
+               bplist = &bp;
+               bp = dabuf->bps[0];
+       } else {
+               bplist = malloc(nbuf * sizeof(*bplist));
+               if (bplist == NULL) {
+                       do_error("couldn't malloc dir2 buffer list\n");
+                       exit(1);
+               }
+               bcopy(dabuf->bps, bplist, nbuf * sizeof(*bplist));
+       }
+       da_buf_done(dabuf);
+       for (i = error = 0; i < nbuf; i++) {
+               e = libxfs_writebuf(bplist[i], 0);
+               if (e)
+                       error = e;
+       }
+       if (bplist != &bp)
+               free(bplist);
+       return error;
+}
+
+void
+da_brelse(
+       xfs_dabuf_t     *dabuf)
+{
+       xfs_buf_t       *bp;
+       xfs_buf_t       **bplist;
+       int             i;
+       int             nbuf;
+
+       if ((nbuf = dabuf->nbuf) == 1) {
+               bplist = &bp;
+               bp = dabuf->bps[0];
+       } else {
+               bplist = malloc(nbuf * sizeof(*bplist));
+               if (bplist == NULL) {
+                       do_error("couldn't malloc dir2 buffer list\n");
+                       exit(1);
+               }
+               bcopy(dabuf->bps, bplist, nbuf * sizeof(*bplist));
+       }
+       da_buf_done(dabuf);
+       for (i = 0; i < nbuf; i++)
+               libxfs_putbuf(bplist[i]);
+       if (bplist != &bp)
+               free(bplist);
+}
+
+/*
+ * walk tree from root to the left-most leaf block reading in
+ * blocks and setting up cursor.  passes back file block number of the
+ * left-most leaf block if successful (bno).  returns 1 if successful,
+ * 0 if unsuccessful.
+ */
+int
+traverse_int_dir2block(xfs_mount_t     *mp,
+               dir2_bt_cursor_t        *da_cursor,
+               xfs_dablk_t             *rbno)
+{
+       bmap_ext_t              *bmp;
+       xfs_dablk_t             bno;
+       xfs_dabuf_t             *bp;
+       int                     i;
+       int                     nex;
+       xfs_da_intnode_t        *node;
+
+       /*
+        * traverse down left-side of tree until we hit the
+        * left-most leaf block setting up the btree cursor along
+        * the way.
+        */
+       bno = mp->m_dirleafblk;
+       i = -1;
+       node = NULL;
+       da_cursor->active = 0;
+
+       do {
+               /*
+                * read in each block along the way and set up cursor
+                */
+               nex = blkmap_getn(da_cursor->blkmap, bno, mp->m_dirblkfsbs,
+                       &bmp);
+
+               if (nex == 0)
+                       goto error_out;
+
+               bp = da_read_buf(mp, nex, bmp);
+               free(bmp);
+               if (bp == NULL) {
+                       do_warn("can't read block %u for directory inode "
+                               "%llu\n",
+                               bno, da_cursor->ino);
+                       goto error_out;
+               }
+
+               node = bp->data;
+
+               if (INT_GET(node->hdr.info.magic, ARCH_CONVERT) ==
+                                       XFS_DIR2_LEAFN_MAGIC)  {
+                       if ( i != -1 ) {
+                               do_warn("found non-root LEAFN node in inode "
+                                       "%llu bno = %u\n",
+                                       da_cursor->ino, bno);
+                       }
+                       if (INT_GET(node->hdr.level, ARCH_CONVERT) >= 1) {
+                               do_warn("LEAFN node level is %d inode %llu "
+                                       "bno = %u\n",
+                                       INT_GET(node->hdr.level, ARCH_CONVERT),
+                                               da_cursor->ino, bno);
+                       }
+                       *rbno = 0;
+                       da_brelse(bp);
+                       return(1);
+               } else if (INT_GET(node->hdr.info.magic, ARCH_CONVERT) !=
+                                       XFS_DA_NODE_MAGIC)  {
+                       da_brelse(bp);
+                       do_warn("bad dir magic number 0x%x in inode %llu "
+                               "bno = %u\n",
+                               INT_GET(node->hdr.info.magic, ARCH_CONVERT),
+                                       da_cursor->ino, bno);
+                       goto error_out;
+               }
+               if (INT_GET(node->hdr.count, ARCH_CONVERT) >
+                                       XFS_DA_NODE_ENTRIES(mp))  {
+                       da_brelse(bp);
+                       do_warn("bad record count in inode %llu, count = %d, "
+                               "max = %d\n", da_cursor->ino,
+                               INT_GET(node->hdr.count, ARCH_CONVERT),
+                               XFS_DA_NODE_ENTRIES(mp));
+                       goto error_out;
+               }
+
+               /*
+                * maintain level counter
+                */
+               if (i == -1)
+                       i = da_cursor->active =
+                               INT_GET(node->hdr.level, ARCH_CONVERT);
+               else  {
+                       if (INT_GET(node->hdr.level, ARCH_CONVERT) == i - 1)  {
+                               i--;
+                       } else  {
+                               do_warn("bad directory btree for directory "
+                                       "inode %llu\n",
+                                       da_cursor->ino);
+                               da_brelse(bp);
+                               goto error_out;
+                       }
+               }
+
+               da_cursor->level[i].hashval =
+                       INT_GET(node->btree[0].hashval, ARCH_CONVERT);
+               da_cursor->level[i].bp = bp;
+               da_cursor->level[i].bno = bno;
+               da_cursor->level[i].index = 0;
+
+               /*
+                * set up new bno for next level down
+                */
+               bno = INT_GET(node->btree[0].before, ARCH_CONVERT);
+       } while (node != NULL && i > 1);
+
+       /*
+        * now return block number and get out
+        */
+       *rbno = da_cursor->level[0].bno = bno;
+       return(1);
+
+error_out:
+       while (i > 1 && i <= da_cursor->active)  {
+               da_brelse(da_cursor->level[i].bp);
+               i++;
+       }
+
+       return(0);
+}
+
+/*
+ * blow out buffer for this level and all the rest above as well
+ * if error == 0, we are not expecting to encounter any unreleased
+ * buffers (e.g. if we do, it's a mistake).  if error == 1, we're
+ * in an error-handling case so unreleased buffers may exist.
+ */
+void
+release_dir2_cursor_int(xfs_mount_t            *mp,
+                       dir2_bt_cursor_t        *cursor,
+                       int                     prev_level,
+                       int                     error)
+{
+       int     level = prev_level + 1;
+
+       if (cursor->level[level].bp != NULL)  {
+               if (!error)  {
+                       do_warn("release_dir2_cursor_int got unexpected "
+                               "non-null bp, dabno = %u\n",
+                               cursor->level[level].bno);
+               }
+               ASSERT(error != 0);
+
+               da_brelse(cursor->level[level].bp);
+               cursor->level[level].bp = NULL;
+       }
+
+       if (level < cursor->active)
+               release_dir2_cursor_int(mp, cursor, level, error);
+
+       return;
+}
+
+void
+release_dir2_cursor(xfs_mount_t                *mp,
+               dir2_bt_cursor_t        *cursor,
+               int                     prev_level)
+{
+       release_dir2_cursor_int(mp, cursor, prev_level, 0);
+}
+
+void
+err_release_dir2_cursor(xfs_mount_t            *mp,
+                       dir2_bt_cursor_t        *cursor,
+                       int                     prev_level)
+{
+       release_dir2_cursor_int(mp, cursor, prev_level, 1);
+}
+
+/*
+ * make sure that all entries in all blocks along the right side of
+ * of the tree are used and hashval's are consistent.  level is the
+ * level of the descendent block.  returns 0 if good (even if it had
+ * to be fixed up), and 1 if bad.  The right edge of the tree is
+ * technically a block boundary.  This routine should be used then
+ * instead of verify_dir2_path().
+ */
+int
+verify_final_dir2_path(xfs_mount_t     *mp,
+               dir2_bt_cursor_t        *cursor,
+               const int               p_level)
+{
+       xfs_da_intnode_t        *node;
+       int                     bad = 0;
+       int                     entry;
+       int                     this_level = p_level + 1;
+
+       /*
+        * the index should point to the next "unprocessed" entry
+        * in the block which should be the final (rightmost) entry
+        */
+       entry = cursor->level[this_level].index;
+       node = (xfs_da_intnode_t *)(cursor->level[this_level].bp->data);
+       /*
+        * check internal block consistency on this level -- ensure
+        * that all entries are used, encountered and expected hashvals
+        * match, etc.
+        */
+       if (entry != INT_GET(node->hdr.count, ARCH_CONVERT) - 1)  {
+               do_warn("directory block used/count inconsistency - %d / %hu\n",
+                       entry, INT_GET(node->hdr.count, ARCH_CONVERT));
+               bad++;
+       }
+       /*
+        * hash values monotonically increasing ???
+        */
+       if (cursor->level[this_level].hashval >= INT_GET(node->btree[entry].hashval, ARCH_CONVERT))  {
+               do_warn("directory/attribute block hashvalue inconsistency, "
+                       "expected > %u / saw %u\n",
+                       cursor->level[this_level].hashval,
+                       INT_GET(node->btree[entry].hashval, ARCH_CONVERT));
+               bad++;
+       }
+       if (INT_GET(node->hdr.info.forw, ARCH_CONVERT) != 0)  {
+               do_warn("bad directory/attribute forward block pointer, "
+                       "expected 0, saw %u\n",
+                       INT_GET(node->hdr.info.forw, ARCH_CONVERT));
+               bad++;
+       }
+       if (bad)  {
+               do_warn("bad directory block in inode %llu\n", cursor->ino);
+               return(1);
+       }
+       /*
+        * keep track of greatest block # -- that gets
+        * us the length of the directory
+        */
+       if (cursor->level[this_level].bno > cursor->greatest_bno)
+               cursor->greatest_bno = cursor->level[this_level].bno;
+
+       /*
+        * ok, now check descendant block number against this level
+        */
+       if (cursor->level[p_level].bno != INT_GET(node->btree[entry].before, ARCH_CONVERT))  {
+               return(1);
+       }
+
+       if (cursor->level[p_level].hashval != INT_GET(node->btree[entry].hashval, ARCH_CONVERT))  {
+               if (!no_modify)  {
+                       do_warn("correcting bad hashval in non-leaf dir "
+                               "block\n");
+                       do_warn("\tin (level %d) in inode %llu.\n",
+                               this_level, cursor->ino);
+                       INT_SET(node->btree[entry].hashval, ARCH_CONVERT, cursor->level[p_level].hashval);
+                       cursor->level[this_level].dirty++;
+               } else  {
+                       do_warn("would correct bad hashval in non-leaf dir "
+                               "block\n");
+                       do_warn("\tin (level %d) in inode %llu.\n",
+                               this_level, cursor->ino);
+               }
+       }
+
+       /*
+        * release/write buffer
+        */
+       ASSERT(cursor->level[this_level].dirty == 0 ||
+               cursor->level[this_level].dirty && !no_modify);
+
+       if (cursor->level[this_level].dirty && !no_modify)
+               da_bwrite(mp, cursor->level[this_level].bp);
+       else
+               da_brelse(cursor->level[this_level].bp);
+
+       cursor->level[this_level].bp = NULL;
+
+       /*
+        * bail out if this is the root block (top of tree)
+        */
+       if (this_level >= cursor->active)  {
+               return(0);
+       }
+       /*
+        * set hashvalue to correctl reflect the now-validated
+        * last entry in this block and continue upwards validation
+        */
+       cursor->level[this_level].hashval = INT_GET(node->btree[entry].hashval, ARCH_CONVERT);
+
+       return(verify_final_dir2_path(mp, cursor, this_level));
+}
+
+/*
+ * Verifies the path from a descendant block up to the root.
+ * Should be called when the descendant level traversal hits
+ * a block boundary before crossing the boundary (reading in a new
+ * block).
+ *
+ * the directory/attr btrees work differently to the other fs btrees.
+ * each interior block contains records that are <hashval, bno>
+ * pairs.  The bno is a file bno, not a filesystem bno.  The last
+ * hashvalue in the block <bno> will be <hashval>.  BUT unlike
+ * the freespace btrees, the *last* value in each block gets
+ * propagated up the tree instead of the first value in each block.
+ * that is, the interior records point to child blocks and the *greatest*
+ * hash value contained by the child block is the one the block above
+ * uses as the key for the child block.
+ *
+ * level is the level of the descendent block.  returns 0 if good,
+ * and 1 if bad.  The descendant block may be a leaf block.
+ *
+ * the invariant here is that the values in the cursor for the
+ * levels beneath this level (this_level) and the cursor index
+ * for this level *must* be valid.
+ *
+ * that is, the hashval/bno info is accurate for all
+ * DESCENDANTS and match what the node[index] information
+ * for the current index in the cursor for this level.
+ *
+ * the index values in the cursor for the descendant level
+ * are allowed to be off by one as they will reflect the
+ * next entry at those levels to be processed.
+ *
+ * the hashvalue for the current level can't be set until
+ * we hit the last entry in the block so, it's garbage
+ * until set by this routine.
+ *
+ * bno and bp for the current block/level are always valid
+ * since they have to be set so we can get a buffer for the
+ * block.
+ */
+int
+verify_dir2_path(xfs_mount_t   *mp,
+       dir2_bt_cursor_t        *cursor,
+       const int               p_level)
+{
+       xfs_da_intnode_t        *node;
+       xfs_da_intnode_t        *newnode;
+       xfs_dablk_t             dabno;
+       xfs_dabuf_t             *bp;
+       int                     bad;
+       int                     entry;
+       int                     this_level = p_level + 1;
+       bmap_ext_t              *bmp;
+       int                     nex;
+
+       /*
+        * index is currently set to point to the entry that
+        * should be processed now in this level.
+        */
+       entry = cursor->level[this_level].index;
+       node = cursor->level[this_level].bp->data;
+
+       /*
+        * if this block is out of entries, validate this
+        * block and move on to the next block.
+        * and update cursor value for said level
+        */
+       if (entry >= INT_GET(node->hdr.count, ARCH_CONVERT))  {
+               /*
+                * update the hash value for this level before
+                * validating it.  bno value should be ok since
+                * it was set when the block was first read in.
+                */
+               cursor->level[this_level].hashval = 
+                               INT_GET(node->btree[entry - 1].hashval, ARCH_CONVERT);
+
+               /*
+                * keep track of greatest block # -- that gets
+                * us the length of the directory
+                */
+               if (cursor->level[this_level].bno > cursor->greatest_bno)
+                       cursor->greatest_bno = cursor->level[this_level].bno;
+
+               /*
+                * validate the path for the current used-up block
+                * before we trash it
+                */
+               if (verify_dir2_path(mp, cursor, this_level))
+                       return(1);
+               /*
+                * ok, now get the next buffer and check sibling pointers
+                */
+               dabno = INT_GET(node->hdr.info.forw, ARCH_CONVERT);
+               ASSERT(dabno != 0);
+               nex = blkmap_getn(cursor->blkmap, dabno, mp->m_dirblkfsbs,
+                       &bmp);
+               if (nex == 0) {
+                       do_warn("can't get map info for block %u of directory "
+                               "inode %llu\n",
+                               dabno, cursor->ino);
+                       return(1);
+               }
+
+               bp = da_read_buf(mp, nex, bmp);
+
+               if (bp == NULL) {
+                       do_warn("can't read block %u for directory inode "
+                               "%llu\n",
+                               dabno, cursor->ino);
+                       return(1);
+               }
+
+               newnode = bp->data;
+               /*
+                * verify magic number and back pointer, sanity-check
+                * entry count, verify level
+                */
+               bad = 0;
+               if (INT_GET(newnode->hdr.info.magic, ARCH_CONVERT) != XFS_DA_NODE_MAGIC)  {
+                       do_warn("bad magic number %x in block %u for directory "
+                               "inode %llu\n",
+                               INT_GET(newnode->hdr.info.magic, ARCH_CONVERT), dabno, cursor->ino);
+                       bad++;
+               }
+               if (INT_GET(newnode->hdr.info.back, ARCH_CONVERT) != cursor->level[this_level].bno)  {
+                       do_warn("bad back pointer in block %u for directory "
+                               "inode %llu\n",
+                               dabno, cursor->ino);
+                       bad++;
+               }
+               if (INT_GET(newnode->hdr.count, ARCH_CONVERT) > XFS_DA_NODE_ENTRIES(mp))  {
+                       do_warn("entry count %d too large in block %u for "
+                               "directory inode %llu\n",
+                               INT_GET(newnode->hdr.count, ARCH_CONVERT), dabno, cursor->ino);
+                       bad++;
+               }
+               if (INT_GET(newnode->hdr.level, ARCH_CONVERT) != this_level)  {
+                       do_warn("bad level %d in block %u for directory inode "
+                               "%llu\n",
+                               INT_GET(newnode->hdr.level, ARCH_CONVERT), dabno, cursor->ino);
+                       bad++;
+               }
+               if (bad)  {
+                       da_brelse(bp);
+                       return(1);
+               }
+               /*
+                * update cursor, write out the *current* level if
+                * required.  don't write out the descendant level
+                */
+               ASSERT(cursor->level[this_level].dirty == 0 ||
+                       cursor->level[this_level].dirty && !no_modify);
+
+               if (cursor->level[this_level].dirty && !no_modify)
+                       da_bwrite(mp, cursor->level[this_level].bp);
+               else
+                       da_brelse(cursor->level[this_level].bp);
+               cursor->level[this_level].bp = bp;
+               cursor->level[this_level].dirty = 0;
+               cursor->level[this_level].bno = dabno;
+               cursor->level[this_level].hashval = INT_GET(newnode->btree[0].hashval, ARCH_CONVERT);
+               node = newnode;
+
+               entry = cursor->level[this_level].index = 0;
+       }
+       /*
+        * ditto for block numbers
+        */
+       if (cursor->level[p_level].bno != INT_GET(node->btree[entry].before, ARCH_CONVERT))  {
+               return(1);
+       }
+       /*
+        * ok, now validate last hashvalue in the descendant
+        * block against the hashval in the current entry
+        */
+       if (cursor->level[p_level].hashval != INT_GET(node->btree[entry].hashval, ARCH_CONVERT))  {
+               if (!no_modify)  {
+                       do_warn("correcting bad hashval in interior dir "
+                               "block\n");
+                       do_warn("\tin (level %d) in inode %llu.\n",
+                               this_level, cursor->ino);
+                       INT_SET(node->btree[entry].hashval, ARCH_CONVERT, cursor->level[p_level].hashval);
+                       cursor->level[this_level].dirty++;
+               } else  {
+                       do_warn("would correct bad hashval in interior dir "
+                               "block\n");
+                       do_warn("\tin (level %d) in inode %llu.\n",
+                               this_level, cursor->ino);
+               }
+       }
+       /*
+        * increment index for this level to point to next entry
+        * (which should point to the next descendant block)
+        */
+       cursor->level[this_level].index++;
+       return(0);
+}
+
+/*
+ * Fix up a shortform directory which was in long form (i8count set)
+ * and is now in short form (i8count clear).
+ * Return pointer to the end of the data when done.
+ */
+void
+process_sf_dir2_fixi8(
+       xfs_dir2_sf_t           *sfp,
+       xfs_dir2_sf_entry_t     **next_sfep)
+{
+       xfs_ino_t               ino;
+       xfs_dir2_sf_t           *newsfp;
+       xfs_dir2_sf_entry_t     *newsfep;
+       xfs_dir2_sf_t           *oldsfp;
+       xfs_dir2_sf_entry_t     *oldsfep;
+       int                     oldsize;
+
+       newsfp = sfp;
+       oldsize = (__psint_t)*next_sfep - (__psint_t)sfp;
+       oldsfp = malloc(oldsize);
+       if (oldsfp == NULL) {
+               do_error("couldn't malloc dir2 shortform copy\n");
+               exit(1);
+       }
+       memmove(oldsfp, newsfp, oldsize);
+       INT_SET(newsfp->hdr.count, ARCH_CONVERT, INT_GET(oldsfp->hdr.count, ARCH_CONVERT));
+       newsfp->hdr.i8count = 0;
+       ino = XFS_DIR2_SF_GET_INUMBER_ARCH(oldsfp, &oldsfp->hdr.parent, ARCH_CONVERT);
+       XFS_DIR2_SF_PUT_INUMBER_ARCH(newsfp, &ino, &newsfp->hdr.parent, ARCH_CONVERT);
+       oldsfep = XFS_DIR2_SF_FIRSTENTRY(oldsfp);
+       newsfep = XFS_DIR2_SF_FIRSTENTRY(newsfp);
+       while ((int)((char *)oldsfep - (char *)oldsfp) < oldsize) {
+               newsfep->namelen = oldsfep->namelen;
+               XFS_DIR2_SF_PUT_OFFSET_ARCH(newsfep,
+                       XFS_DIR2_SF_GET_OFFSET_ARCH(oldsfep, ARCH_CONVERT), ARCH_CONVERT);
+               memmove(newsfep->name, oldsfep->name, newsfep->namelen);
+               ino = XFS_DIR2_SF_GET_INUMBER_ARCH(oldsfp,
+                       XFS_DIR2_SF_INUMBERP(oldsfep), ARCH_CONVERT);
+               XFS_DIR2_SF_PUT_INUMBER_ARCH(newsfp, &ino,
+                       XFS_DIR2_SF_INUMBERP(newsfep), ARCH_CONVERT);
+               oldsfep = XFS_DIR2_SF_NEXTENTRY(oldsfp, oldsfep);
+               newsfep = XFS_DIR2_SF_NEXTENTRY(newsfp, newsfep);
+       }
+       *next_sfep = newsfep;
+       free(oldsfp);
+}
+
+/*
+ * Regenerate legal (minimal) offsets for the shortform directory.
+ */
+static void
+process_sf_dir2_fixoff(
+       xfs_dinode_t    *dip)
+{
+       int                     i;
+       int                     offset;
+       xfs_dir2_sf_entry_t     *sfep;
+       xfs_dir2_sf_t           *sfp;
+
+       for (i = 0, sfp = &dip->di_u.di_dir2sf,
+               sfep = XFS_DIR2_SF_FIRSTENTRY(sfp),
+               offset = XFS_DIR2_DATA_FIRST_OFFSET;
+            i < INT_GET(sfp->hdr.count, ARCH_CONVERT);
+            i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep)) {
+               XFS_DIR2_SF_PUT_OFFSET_ARCH(sfep, offset, ARCH_CONVERT);
+               offset += XFS_DIR2_DATA_ENTSIZE(sfep->namelen);
+       }
+}
+
+/*
+ * this routine performs inode discovery and tries to fix things
+ * in place.  available redundancy -- inode data size should match
+ * used directory space in inode.
+ * a non-zero return value means the directory is bogus and should be blasted.
+ */
+/* ARGSUSED */
+static int
+process_sf_dir2(
+       xfs_mount_t     *mp,
+       xfs_ino_t       ino,
+       xfs_dinode_t    *dip,
+       int             ino_discovery,
+       int             *dino_dirty,    /* out - 1 if dinode buffer dirty */
+       char            *dirname,       /* directory pathname */
+       xfs_ino_t       *parent,        /* out - NULLFSINO if entry not exist */
+       int             *repair)        /* out - 1 if dir was fixed up */
+{
+       int                     bad_offset;
+       int                     bad_sfnamelen;
+       int                     i;
+       int                     i8;
+       __int64_t               ino_dir_size;
+       int                     ino_off;
+       ino_tree_node_t         *irec_p;
+       int                     junkit;
+       char                    *junkreason = NULL;
+       xfs_ino_t               lino;
+       int                     max_size;
+       char                    name[MAXNAMELEN + 1];
+       int                     namelen;
+       xfs_dir2_sf_entry_t     *next_sfep;
+       int                     num_entries;
+       int                     offset;
+       xfs_dir2_sf_t           *sfp;
+       xfs_dir2_sf_entry_t     *sfep;
+       int                     tmp_elen;
+       int                     tmp_len;
+       xfs_dir2_sf_entry_t     *tmp_sfep;
+       xfs_ino_t               zero = 0;
+
+       sfp = &dip->di_u.di_dir2sf;
+       max_size = XFS_DFORK_DSIZE_ARCH(dip, mp, ARCH_CONVERT);
+       num_entries = INT_GET(sfp->hdr.count, ARCH_CONVERT);
+       ino_dir_size = INT_GET(dip->di_core.di_size, ARCH_CONVERT);
+       offset = XFS_DIR2_DATA_FIRST_OFFSET;
+       i8 = bad_offset = *repair = 0;
+
+       ASSERT(ino_dir_size <= max_size);
+
+       /* 
+        * check for bad entry count
+        */
+       if (num_entries * XFS_DIR2_SF_ENTSIZE_BYNAME(sfp, 1) +
+                   XFS_DIR2_SF_HDR_SIZE(0) > max_size ||
+           num_entries == 0)
+               num_entries = 0xFF;
+
+       /*
+        * run through entries, stop at first bad entry, don't need
+        * to check for .. since that's encoded in its own field
+        */
+       sfep = next_sfep = XFS_DIR2_SF_FIRSTENTRY(sfp);
+       for (i = 0;
+            i < num_entries && ino_dir_size > (char *)next_sfep - (char *)sfp;
+            i++) {
+               tmp_sfep = NULL;
+               sfep = next_sfep;
+               junkit = 0;
+               bad_sfnamelen = 0;
+               lino = XFS_DIR2_SF_GET_INUMBER_ARCH(sfp, XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT);
+               /*
+                * if entry points to self, junk it since only '.' or '..'
+                * should do that and shortform dirs don't contain either
+                * entry.  if inode number is invalid, trash entry.
+                * if entry points to special inodes, trash it.
+                * if inode is unknown but number is valid,
+                * add it to the list of uncertain inodes.  don't
+                * have to worry about an entry pointing to a
+                * deleted lost+found inode because the entry was
+                * deleted at the same time that the inode was cleared.
+                */
+               if (lino == ino) {
+                       junkit = 1;
+                       junkreason = "current";
+               } else if (verify_inum(mp, lino)) {
+                       junkit = 1;
+                       junkreason = "invalid";
+               } else if (lino == mp->m_sb.sb_rbmino)  {
+                       junkit = 1;
+                       junkreason = "realtime bitmap";
+               } else if (lino == mp->m_sb.sb_rsumino)  {
+                       junkit = 1;
+                       junkreason = "realtime summary";
+               } else if (lino == mp->m_sb.sb_uquotino)  {
+                       junkit = 1;
+                       junkreason = "user quota";
+               } else if (lino == mp->m_sb.sb_pquotino)  {
+                       junkit = 1;
+                       junkreason = "project quota";
+               } else if ((irec_p = find_inode_rec(XFS_INO_TO_AGNO(mp, lino),
+                                       XFS_INO_TO_AGINO(mp, lino))) != NULL) {
+                       /*
+                        * if inode is marked free and we're in inode
+                        * discovery mode, leave the entry alone for now.
+                        * if the inode turns out to be used, we'll figure
+                        * that out when we scan it.  If the inode really
+                        * is free, we'll hit this code again in phase 4
+                        * after we've finished inode discovery and blow
+                        * out the entry then.
+                        */
+                       ino_off = XFS_INO_TO_AGINO(mp, lino) -
+                               irec_p->ino_startnum;
+                       ASSERT(is_inode_confirmed(irec_p, ino_off));
+                       if (is_inode_free(irec_p, ino_off) && !ino_discovery) {
+                               junkit = 1;
+                               junkreason = "free";
+                       }
+               } else if (ino_discovery) {
+                       /*
+                        * put the inode on the uncertain list.  we'll
+                        * pull the inode off the list and check it later.
+                        * if the inode turns out be bogus, we'll delete
+                        * this entry in phase 6.
+                        */
+                       add_inode_uncertain(mp, lino, 0);
+               } else  {
+                       /*
+                        * blow the entry out.  we know about all
+                        * undiscovered entries now (past inode discovery
+                        * phase) so this is clearly a bogus entry.
+                        */
+                       junkit = 1;
+                       junkreason = "non-existent";
+               }
+               namelen = sfep->namelen;
+               if (junkit)
+                       do_warn("entry \"%*.*s\" in shortform directory %llu "
+                               "references %s inode %llu\n",
+                               namelen, namelen, sfep->name, ino, junkreason,
+                               lino);
+               if (namelen == 0)  {
+                       /*
+                        * if we're really lucky, this is
+                        * the last entry in which case we
+                        * can use the dir size to set the
+                        * namelen value.  otherwise, forget
+                        * it because we're not going to be
+                        * able to find the next entry.
+                        */
+                       bad_sfnamelen = 1;
+
+                       if (i == num_entries - 1)  {
+                               namelen = ino_dir_size -
+                                       ((__psint_t) &sfep->name[0] -
+                                        (__psint_t) sfp);
+                               if (!no_modify)  {
+                                       do_warn("zero length entry in "
+                                               "shortform dir %llu, resetting "
+                                               "to %d\n",
+                                               ino, namelen);
+                                       sfep->namelen = namelen;
+                               } else  {
+                                       do_warn("zero length entry in "
+                                               "shortform dir %llu, would set "
+                                               "to %d\n",
+                                               ino, namelen);
+                               }
+                       } else  {
+                               do_warn("zero length entry in shortform dir "
+                                       "%llu",
+                                       ino);
+                               if (!no_modify)
+                                       do_warn(", junking %d entries\n",
+                                               num_entries - i);
+                               else
+                                       do_warn(", would junk %d entries\n",
+                                               num_entries - i);
+                               /*
+                                * don't process the rest of the directory,
+                                * break out of processing looop
+                                */
+                               break;
+                       }
+               } else if ((__psint_t) sfep - (__psint_t) sfp +
+                               + XFS_DIR2_SF_ENTSIZE_BYENTRY(sfp, sfep)
+                               > ino_dir_size)  {
+                       bad_sfnamelen = 1;
+
+                       if (i == num_entries - 1)  {
+                               namelen = ino_dir_size -
+                                       ((__psint_t) &sfep->name[0] -
+                                        (__psint_t) sfp);
+                               do_warn("size of last entry overflows space "
+                                       "left in in shortform dir %llu, ",
+                                       ino);
+                               if (!no_modify)  {
+                                       do_warn("resetting to %d\n",
+                                               namelen);
+                                       sfep->namelen = namelen;
+                                       *dino_dirty = 1;
+                               } else  {
+                                       do_warn("would reset to %d\n",
+                                               namelen);
+                               }
+                       } else  {
+                               do_warn("size of entry #%d overflows space "
+                                       "left in in shortform dir %llu\n",
+                                       i, ino);
+                               if (!no_modify)  {
+                                       if (i == num_entries - 1)
+                                               do_warn("junking entry #%d\n",
+                                                       i);
+                                       else
+                                               do_warn("junking %d entries\n",
+                                                       num_entries - i);
+                               } else  {
+                                       if (i == num_entries - 1)
+                                               do_warn("would junk entry "
+                                                       "#%d\n",
+                                                       i);
+                                       else
+                                               do_warn("would junk %d "
+                                                       "entries\n",
+                                                       num_entries - i);
+                               }
+
+                               break;
+                       }
+               }
+
+               /*
+                * check for illegal chars in name.
+                * no need to check for bad length because
+                * the length value is stored in a byte
+                * so it can't be too big, it can only wrap
+                */
+               if (namecheck((char *)&sfep->name[0], namelen))  {
+                       /*
+                        * junk entry
+                        */
+                       do_warn("entry contains illegal character in shortform "
+                               "dir %llu\n",
+                               ino);
+                       junkit = 1;
+               }
+
+               if (XFS_DIR2_SF_GET_OFFSET_ARCH(sfep, ARCH_CONVERT) < offset) {
+                       do_warn("entry contains offset out of order in "
+                               "shortform dir %llu\n",
+                               ino);
+                       bad_offset = 1;
+               }
+               offset = XFS_DIR2_SF_GET_OFFSET_ARCH(sfep, ARCH_CONVERT) +
+                        XFS_DIR2_DATA_ENTSIZE(namelen);
+
+               /*
+                * junk the entry by copying up the rest of the
+                * fork over the current entry and decrementing
+                * the entry count.  if we're in no_modify mode,
+                * just issue the warning instead.  then continue
+                * the loop with the next_sfep pointer set to the
+                * correct place in the fork and other counters
+                * properly set to reflect the deletion if it
+                * happened.
+                */
+               if (junkit)  {
+                       bcopy(sfep->name, name, namelen);
+                       name[namelen] = '\0';
+
+                       if (!no_modify)  {
+                               tmp_elen =
+                                       XFS_DIR2_SF_ENTSIZE_BYENTRY(sfp, sfep);
+                               INT_MOD(dip->di_core.di_size, ARCH_CONVERT, -(tmp_elen));
+                               ino_dir_size -= tmp_elen;
+
+                               tmp_sfep = (xfs_dir2_sf_entry_t *)
+                                       ((__psint_t) sfep + tmp_elen);
+                               tmp_len = max_size - ((__psint_t) tmp_sfep
+                                                       - (__psint_t) sfp);
+
+                               memmove(sfep, tmp_sfep, tmp_len);
+
+                               INT_MOD(sfp->hdr.count, ARCH_CONVERT, -1);
+                               num_entries--;
+                               bzero((void *) ((__psint_t) sfep + tmp_len),
+                                       tmp_elen);
+
+                               /*
+                                * reset the tmp value to the current
+                                * pointer so we'll process the entry
+                                * we just moved up
+                                */
+                               tmp_sfep = sfep;
+
+                               /*
+                                * WARNING:  drop the index i by one
+                                * so it matches the decremented count
+                                * for accurate comparisons later
+                                */
+                               i--;
+
+                               *dino_dirty = 1;
+                               *repair = 1;
+
+                               do_warn("junking entry \"%s\" in directory "
+                                       "inode %llu\n",
+                                       name, ino);
+                       } else  {
+                               do_warn("would have junked entry \"%s\" in "
+                                       "directory inode %llu\n",
+                                       name, ino);
+                       }
+               } else if (lino > XFS_DIR2_MAX_SHORT_INUM)
+                       i8++;
+               /*
+                * go onto next entry unless we've just junked an
+                * entry in which the current entry pointer points
+                * to an unprocessed entry.  have to take into zero-len
+                * entries into account in no modify mode since we
+                * calculate size based on next_sfep.
+                */
+               next_sfep = (tmp_sfep == NULL)
+                       ? (xfs_dir2_sf_entry_t *) ((__psint_t) sfep
+                               + ((!bad_sfnamelen)
+                                       ? XFS_DIR2_SF_ENTSIZE_BYENTRY(sfp,
+                                               sfep)
+                                       : XFS_DIR2_SF_ENTSIZE_BYNAME(sfp,
+                                               namelen)))
+                       : tmp_sfep;
+       }
+
+       /* sync up sizes and entry counts */
+
+       if (INT_GET(sfp->hdr.count, ARCH_CONVERT) != i) {
+               if (no_modify) {
+                       do_warn("would have corrected entry count in directory "
+                               "%llu from %d to %d\n",
+                               ino, INT_GET(sfp->hdr.count, ARCH_CONVERT), i);
+               } else {
+                       do_warn("corrected entry count in directory %llu, was "
+                               "%d, now %d\n",
+                               ino, INT_GET(sfp->hdr.count, ARCH_CONVERT), i);
+                       INT_SET(sfp->hdr.count, ARCH_CONVERT, i);
+                       *dino_dirty = 1;
+                       *repair = 1;
+               }
+       }
+
+       if (sfp->hdr.i8count != i8)  {
+               if (no_modify)  {
+                       do_warn("would have corrected i8 count in directory "
+                               "%llu from %d to %d\n",
+                               ino, sfp->hdr.i8count, i8);
+               } else {
+                       do_warn("corrected i8 count in directory %llu, was %d, "
+                               "now %d\n",
+                               ino, sfp->hdr.i8count, i8);
+                       if (i8 == 0)
+                               process_sf_dir2_fixi8(sfp, &next_sfep);
+                       else
+                               sfp->hdr.i8count = i8;
+                       *dino_dirty = 1;
+                       *repair = 1;
+               }
+       }
+
+       if ((__psint_t) next_sfep - (__psint_t) sfp != ino_dir_size)  {
+               if (no_modify)  {
+                       do_warn("would have corrected directory %llu size from "
+                               "%lld to %lld\n",
+                               ino, (__int64_t) ino_dir_size,
+                               (__int64_t)((__psint_t)next_sfep -
+                                           (__psint_t)sfp));
+               } else  {
+                       do_warn("corrected directory %llu size, was %lld, now "
+                               "%lld\n",
+                               ino, (__int64_t) ino_dir_size,
+                               (__int64_t)((__psint_t)next_sfep -
+                                           (__psint_t)sfp));
+
+                       INT_SET(dip->di_core.di_size, ARCH_CONVERT, (xfs_fsize_t)((__psint_t)next_sfep -
+                                             (__psint_t)sfp));
+                       *dino_dirty = 1;
+                       *repair = 1;
+               }
+       }
+       if (offset + (INT_GET(sfp->hdr.count, ARCH_CONVERT) + 2) * sizeof(xfs_dir2_leaf_entry_t) +
+           sizeof(xfs_dir2_block_tail_t) > mp->m_dirblksize) {
+               do_warn("directory %llu offsets too high\n", ino);
+               bad_offset = 1;
+       }
+       if (bad_offset) {
+               if (no_modify) {
+                       do_warn("would have corrected entry offsets in "
+                               "directory %llu\n",
+                               ino);
+               } else {
+                       do_warn("corrected entry offsets in directory %llu\n",
+                               ino);
+                       process_sf_dir2_fixoff(dip);
+                       *dino_dirty = 1;
+                       *repair = 1;
+               }
+       }
+
+       /*
+        * check parent (..) entry
+        */
+       *parent = XFS_DIR2_SF_GET_INUMBER_ARCH(sfp, &sfp->hdr.parent, ARCH_CONVERT);
+
+       /*
+        * if parent entry is bogus, null it out.  we'll fix it later .
+        */
+       if (verify_inum(mp, *parent))  {
+
+               do_warn("bogus .. inode number (%llu) in directory inode "
+                       "%llu, ",
+                               *parent, ino);
+               *parent = NULLFSINO;
+               if (!no_modify)  {
+                       do_warn("clearing inode number\n");
+
+                       XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp, &zero, &sfp->hdr.parent, ARCH_CONVERT);
+                       *dino_dirty = 1;
+                       *repair = 1;
+               } else  {
+                       do_warn("would clear inode number\n");
+               }
+       } else if (ino == mp->m_sb.sb_rootino && ino != *parent) {
+               /*
+                * root directories must have .. == .
+                */
+               if (!no_modify)  {
+                       do_warn("corrected root directory %llu .. entry, was "
+                               "%llu, now %llu\n",
+                               ino, *parent, ino);
+                       *parent = ino;
+                       XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp, parent, &sfp->hdr.parent, ARCH_CONVERT);
+                       *dino_dirty = 1;
+                       *repair = 1;
+               } else  {
+                       do_warn("would have corrected root directory %llu .. "
+                               "entry from %llu to %llu\n",
+                               ino, *parent, ino);
+               }
+       } else if (ino == *parent && ino != mp->m_sb.sb_rootino)  {
+               /*
+                * likewise, non-root directories can't have .. pointing
+                * to .
+                */
+               *parent = NULLFSINO;
+               do_warn("bad .. entry in directory inode %llu, points to "
+                       "self,",
+                       ino);
+               if (!no_modify)  {
+                       do_warn(" clearing inode number\n");
+
+                       XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp, &zero, &sfp->hdr.parent, ARCH_CONVERT);
+                       *dino_dirty = 1;
+                       *repair = 1;
+               } else  {
+                       do_warn(" would clear inode number\n");
+               }
+       }
+
+       return(0);
+}
+
+/*
+ * Process one directory data block.
+ */
+/* ARGSUSED */
+static int
+process_dir2_data(
+       xfs_mount_t     *mp,
+       xfs_ino_t       ino,
+       xfs_dinode_t    *dip,
+       int             ino_discovery,
+       char            *dirname,       /* directory pathname */
+       xfs_ino_t       *parent,        /* out - NULLFSINO if entry not exist */
+       xfs_dabuf_t     *bp,
+       int             *dot,           /* out - 1 if there is a dot, else 0 */
+       int             *dotdot,        /* out - 1 if there's a dotdot, else 0 */
+       xfs_dablk_t     da_bno,
+       char            *endptr)
+{
+       int                     badbest;
+       xfs_dir2_data_free_t    *bf;
+       int                     clearino;
+       char                    *clearreason = NULL;
+       xfs_dir2_data_t         *d;
+       xfs_dir2_data_entry_t   *dep;
+       xfs_dir2_data_free_t    *dfp;
+       xfs_dir2_data_unused_t  *dup;
+       int                     freeseen;
+       int                     i;
+       int                     ino_off;
+       ino_tree_node_t         *irec_p;
+       int                     junkit;
+       int                     lastfree;
+       int                     nm_illegal;
+       char                    *ptr;
+
+       d = bp->data;
+       bf = d->hdr.bestfree;
+       ptr = (char *)d->u;
+       badbest = lastfree = freeseen = 0;
+       if (INT_GET(bf[0].length, ARCH_CONVERT) == 0) {
+               badbest |= INT_GET(bf[0].offset, ARCH_CONVERT) != 0;
+               freeseen |= 1 << 0;
+       }
+       if (INT_GET(bf[1].length, ARCH_CONVERT) == 0) {
+               badbest |= INT_GET(bf[1].offset, ARCH_CONVERT) != 0;
+               freeseen |= 1 << 1;
+       }
+       if (INT_GET(bf[2].length, ARCH_CONVERT) == 0) {
+               badbest |= INT_GET(bf[2].offset, ARCH_CONVERT) != 0;
+               freeseen |= 1 << 2;
+       }
+       badbest |= INT_GET(bf[0].length, ARCH_CONVERT) < INT_GET(bf[1].length, ARCH_CONVERT);
+       badbest |= INT_GET(bf[1].length, ARCH_CONVERT) < INT_GET(bf[2].length, ARCH_CONVERT);
+       while (ptr < endptr) {
+               dup = (xfs_dir2_data_unused_t *)ptr;
+               /*
+                * If it's unused, look for the space in the bestfree table.
+                * If we find it, account for that, else make sure it doesn't
+                * need to be there.
+                */
+               if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG) {
+                       if (ptr + INT_GET(dup->length, ARCH_CONVERT) > endptr || INT_GET(dup->length, ARCH_CONVERT) == 0 ||
+                           (INT_GET(dup->length, ARCH_CONVERT) & (XFS_DIR2_DATA_ALIGN - 1)))
+                               break;
+                       if (INT_GET(*XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(dup, ARCH_CONVERT), ARCH_CONVERT) !=
+                           (char *)dup - (char *)d)
+                               break;
+                       badbest |= lastfree != 0;
+                       dfp = xfs_dir2_data_freefind(d, dup);
+                       if (dfp) {
+                               i = dfp - bf;
+                               badbest |= (freeseen & (1 << i)) != 0;
+                               freeseen |= 1 << i;
+                       } else
+                               badbest |= INT_GET(dup->length, ARCH_CONVERT) > INT_GET(bf[2].length, ARCH_CONVERT);
+                       ptr += INT_GET(dup->length, ARCH_CONVERT);
+                       lastfree = 1;
+                       continue;
+               }
+               dep = (xfs_dir2_data_entry_t *)ptr;
+               if (ptr + XFS_DIR2_DATA_ENTSIZE(dep->namelen) > endptr)
+                       break;
+               if (INT_GET(*XFS_DIR2_DATA_ENTRY_TAG_P(dep), ARCH_CONVERT) != (char *)dep - (char *)d)
+                       break;
+               ptr += XFS_DIR2_DATA_ENTSIZE(dep->namelen);
+               lastfree = 0;
+       }
+       /*
+        * Dropped out before we processed everything, give up.
+        * Phase 6 will kill this block if we don't kill the inode.
+        */
+       if (ptr != endptr) {
+               do_warn("corrupt block %u in directory inode %llu\n",
+                       da_bno, ino);
+               if (!no_modify)
+                       do_warn("\twill junk block\n");
+               else
+                       do_warn("\twould junk block\n");
+               return 1;
+       }
+       ptr = (char *)d->u;
+       /*
+        * Process the entries now.
+        */
+       while (ptr < endptr) {
+               dup = (xfs_dir2_data_unused_t *)ptr;
+               if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG) {
+                       ptr += INT_GET(dup->length, ARCH_CONVERT);
+                       continue;
+               }
+               dep = (xfs_dir2_data_entry_t *)ptr;
+               /*
+                * We may have to blow out an entry because of bad inode
+                * numbers.  Do NOT touch the name until after we've computed
+                * the hashvalue and done a namecheck() on the name.
+                */
+               if (!ino_discovery && INT_GET(dep->inumber, ARCH_CONVERT) == BADFSINO) {
+                       /*
+                        * Don't do a damned thing.  We already found this
+                        * (or did it ourselves) during phase 3.
+                        */
+                       clearino = 0;
+               } else if (verify_inum(mp, INT_GET(dep->inumber, ARCH_CONVERT))) {
+                       /*
+                        * Bad inode number.  Clear the inode number and the
+                        * entry will get removed later.  We don't trash the
+                        * directory since it's still structurally intact.
+                        */
+                       clearino = 1;
+                       clearreason = "invalid";
+               } else if (INT_GET(dep->inumber, ARCH_CONVERT) == mp->m_sb.sb_rbmino) {
+                       clearino = 1;
+                       clearreason = "realtime bitmap";
+               } else if (INT_GET(dep->inumber, ARCH_CONVERT) == mp->m_sb.sb_rsumino) {
+                       clearino = 1;
+                       clearreason = "realtime summary";
+               } else if (INT_GET(dep->inumber, ARCH_CONVERT) == mp->m_sb.sb_uquotino) {
+                       clearino = 1;
+                       clearreason = "user quota";
+               } else if (INT_GET(dep->inumber, ARCH_CONVERT) == mp->m_sb.sb_pquotino) {
+                       clearino = 1;
+                       clearreason = "project quota";
+               } else if (INT_GET(dep->inumber, ARCH_CONVERT) == old_orphanage_ino) {
+                       /*
+                        * Do nothing, silently ignore it, entry has already
+                        * been marked TBD since old_orphanage_ino is set
+                        * non-zero.
+                        */
+                       clearino = 0;
+               } else if ((irec_p = find_inode_rec(
+                               XFS_INO_TO_AGNO(mp, INT_GET(dep->inumber, ARCH_CONVERT)),
+                               XFS_INO_TO_AGINO(mp, INT_GET(dep->inumber, ARCH_CONVERT)))) != NULL) {
+                       /*
+                        * Inode recs should have only confirmed inodes in them.
+                        */
+                       ino_off =
+                               XFS_INO_TO_AGINO(mp, INT_GET(dep->inumber, ARCH_CONVERT)) -
+                               irec_p->ino_startnum;
+                       ASSERT(is_inode_confirmed(irec_p, ino_off));
+                       /*
+                        * If inode is marked free and we're in inode discovery
+                        * mode, leave the entry alone for now.  If the inode
+                        * turns out to be used, we'll figure that out when we
+                        * scan it.  If the inode really is free, we'll hit this
+                        * code again in phase 4 after we've finished inode
+                        * discovery and blow out the entry then.
+                        */
+                       if (!ino_discovery && is_inode_free(irec_p, ino_off)) {
+                               clearino = 1;
+                               clearreason = "free";
+                       } else
+                               clearino = 0;
+               } else if (ino_discovery) {
+                       add_inode_uncertain(mp, INT_GET(dep->inumber, ARCH_CONVERT), 0);
+                       clearino = 0;
+               } else {
+                       clearino = 1;
+                       clearreason = "non-existent";
+               }
+               if (clearino)
+                       do_warn("entry \"%*.*s\" at block %u offset %d in "
+                               "directory inode %llu references %s inode "
+                               "%llu\n",
+                               dep->namelen, dep->namelen, dep->name,
+                               da_bno, (char *)ptr - (char *)d, ino,
+                               clearreason, INT_GET(dep->inumber, ARCH_CONVERT));
+               /*
+                * If the name length is 0 (illegal) make it 1 and blast
+                * the entry.
+                */
+               if (dep->namelen == 0) {
+                       do_warn("entry at block %u offset %d in directory "
+                               "inode %llu has 0 namelength\n",
+                               da_bno, (char *)ptr - (char *)d, ino);
+                       if (!no_modify)
+                               dep->namelen = 1;
+                       clearino = 1;
+               }
+               /*
+                * If needed to clear the inode number, do it now.
+                */
+               if (clearino) {
+                       if (!no_modify) {
+                               do_warn("\tclearing inode number in entry at "
+                                       "offset %d...\n",
+                                       (char *)ptr - (char *)d);
+                               INT_SET(dep->inumber, ARCH_CONVERT, BADFSINO);
+                               bp->dirty = 1;
+                       } else {
+                               do_warn("\twould clear inode number in entry "
+                                       "at offset %d...\n",
+                                       (char *)ptr - (char *)d);
+                       }
+               }
+               /*
+                * Only complain about illegal names in phase 3 (when inode
+                * discovery is turned on).  Otherwise, we'd complain a lot
+                * during phase 4.
+                */
+               junkit = INT_GET(dep->inumber, ARCH_CONVERT) == BADFSINO;
+               nm_illegal = namecheck((char *)dep->name, dep->namelen);
+               if (ino_discovery && nm_illegal) {
+                       do_warn("entry at block %u offset %d in directory "
+                               "inode %llu has illegal name \"%*.*s\": ",
+                               da_bno, (char *)ptr - (char *)d, ino,
+                               dep->namelen, dep->namelen, dep->name);
+                       junkit = 1;
+               }
+               /*
+                * Now we can mark entries with BADFSINO's bad.
+                */
+               if (!no_modify && INT_GET(dep->inumber, ARCH_CONVERT) == BADFSINO) {
+                       dep->name[0] = '/';
+                       bp->dirty = 1;
+                       junkit = 0;
+               }
+               /*
+                * Special .. entry processing.
+                */
+               if (dep->namelen == 2 &&
+                   dep->name[0] == '.' && dep->name[1] == '.') {
+                       if (!*dotdot) {
+                               (*dotdot)++;
+                               *parent = INT_GET(dep->inumber, ARCH_CONVERT);
+                               /*
+                                * What if .. == .?  Legal only in the root
+                                * inode.  Blow out entry and set parent to
+                                * NULLFSINO otherwise.
+                                */
+                               if (ino == INT_GET(dep->inumber, ARCH_CONVERT) &&
+                                   ino != mp->m_sb.sb_rootino) {
+                                       *parent = NULLFSINO;
+                                       do_warn("bad .. entry in directory "
+                                               "inode %llu, points to self: ",
+                                               ino);
+                                       junkit = 1;
+                               }
+                               /*
+                                * We have to make sure that . == .. in the
+                                * root inode.
+                                */
+                               else if (ino != INT_GET(dep->inumber, ARCH_CONVERT) &&
+                                          ino == mp->m_sb.sb_rootino) {
+                                       do_warn("bad .. entry in root "
+                                               "directory inode %llu, was "
+                                               "%llu: ",
+                                               ino, INT_GET(dep->inumber, ARCH_CONVERT));
+                                       if (!no_modify) {
+                                               do_warn("correcting\n");
+                                               INT_SET(dep->inumber, ARCH_CONVERT, ino);
+                                               bp->dirty = 1;
+                                       } else {
+                                               do_warn("would correct\n");
+                                       }
+                               }
+                       }
+                       /*
+                        * Can't fix the directory unless we know which ..
+                        * entry is the right one.  Both have valid inode
+                        * numbers or we wouldn't be here.  So since both
+                        * seem equally valid, trash this one.
+                        */
+                       else {
+                               do_warn("multiple .. entries in directory "
+                                       "inode %llu: ",
+                                       ino);
+                               junkit = 1;
+                       }
+               }
+               /*
+                * Special . entry processing.
+                */
+               else if (dep->namelen == 1 && dep->name[0] == '.') {
+                       if (!*dot) {
+                               (*dot)++;
+                               if (INT_GET(dep->inumber, ARCH_CONVERT) != ino) {
+                                       do_warn("bad . entry in directory "
+                                               "inode %llu, was %llu: ",
+                                               ino, INT_GET(dep->inumber, ARCH_CONVERT));
+                                       if (!no_modify) {
+                                               do_warn("correcting\n");
+                                               INT_SET(dep->inumber, ARCH_CONVERT, ino);
+                                               bp->dirty = 1;
+                                       } else {
+                                               do_warn("would correct\n");
+                                       }
+                               }
+                       } else {
+                               do_warn("multiple . entries in directory "
+                                       "inode %llu: ",
+                                       ino);
+                               junkit = 1;
+                       }
+               }
+               /*
+                * All other entries -- make sure only . references self.
+                */
+               else if (INT_GET(dep->inumber, ARCH_CONVERT) == ino) {
+                       do_warn("entry \"%*.*s\" in directory inode %llu "
+                               "points to self: ",
+                               dep->namelen, dep->namelen, dep->name, ino);
+                       junkit = 1;
+               }
+               /*
+                * Clear junked entries.
+                */
+               if (junkit) {
+                       if (!no_modify) {
+                               dep->name[0] = '/';
+                               bp->dirty = 1;
+                               do_warn("clearing entry\n");
+                       } else {
+                               do_warn("would clear entry\n");
+                       }
+               }
+               /*
+                * Advance to the next entry.
+                */
+               ptr += XFS_DIR2_DATA_ENTSIZE(dep->namelen);
+       }
+       /*
+        * Check the bestfree table.
+        */
+       if (freeseen != 7 || badbest) {
+               do_warn("bad bestfree table in block %u in directory inode "
+                       "%llu: ",
+                       da_bno, ino);
+               if (!no_modify) {
+                       do_warn("repairing table\n");
+                       libxfs_dir2_data_freescan(mp, d, &i, endptr);
+                       bp->dirty = 1;
+               } else {
+                       do_warn("would repair table\n");
+               }
+       }
+       return 0;
+}
+
+/*
+ * Process a block-format directory.
+ */
+/* ARGSUSED */
+static int
+process_block_dir2(
+       xfs_mount_t     *mp,
+       xfs_ino_t       ino,
+       xfs_dinode_t    *dip,
+       int             ino_discovery,
+       int             *dino_dirty,    /* out - 1 if dinode buffer dirty */
+       char            *dirname,       /* directory pathname */
+       xfs_ino_t       *parent,        /* out - NULLFSINO if entry not exist */
+       blkmap_t        *blkmap,
+       int             *dot,           /* out - 1 if there is a dot, else 0 */
+       int             *dotdot,        /* out - 1 if there's a dotdot, else 0 */
+       int             *repair)        /* out - 1 if something was fixed */
+{
+       xfs_dir2_block_t        *block;
+       xfs_dir2_leaf_entry_t   *blp;
+       bmap_ext_t              *bmp;
+       xfs_dabuf_t             *bp;
+       xfs_dir2_block_tail_t   *btp;
+       int                     nex;
+       int                     rval;
+
+       *repair = *dot = *dotdot = 0;
+       *parent = NULLFSINO;
+       nex = blkmap_getn(blkmap, mp->m_dirdatablk, mp->m_dirblkfsbs, &bmp);
+       if (nex == 0) {
+               do_warn("block %u for directory inode %llu is missing\n",
+                       mp->m_dirdatablk, ino);
+               return 1;
+       }
+       bp = da_read_buf(mp, nex, bmp);
+       free(bmp);
+       if (bp == NULL) {
+               do_warn("can't read block %u for directory inode %llu\n",
+                       mp->m_dirdatablk, ino);
+               return 1;
+       }
+       /*
+        * Verify the block
+        */
+       block = bp->data;
+       if (INT_GET(block->hdr.magic, ARCH_CONVERT) != XFS_DIR2_BLOCK_MAGIC)
+               do_warn("bad directory block magic # %#x in block %u for "
+                       "directory inode %llu\n",
+                       INT_GET(block->hdr.magic, ARCH_CONVERT), mp->m_dirdatablk, ino);
+       /*
+        * process the data area
+        * this also checks & fixes the bestfree
+        */
+       btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+       blp = XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT);
+       /*
+        * Don't let this go past the end of the block.
+        */
+       if ((char *)blp > (char *)btp)
+               blp = (xfs_dir2_leaf_entry_t *)btp;
+       rval = process_dir2_data(mp, ino, dip, ino_discovery, dirname, parent,
+               bp, dot, dotdot, mp->m_dirdatablk, (char *)blp);
+       if (bp->dirty && !no_modify) {
+               *repair = 1;
+               da_bwrite(mp, bp);
+       } else
+               da_brelse(bp);
+       return rval;
+}
+
+/*
+ * Validates leaf contents, node format directories only.
+ * magic number and sibling pointers checked by caller.
+ * Returns 0 if block is ok, 1 if the block is bad.
+ * Looking for: out of order hash values, bad stale counts.
+ */
+static int
+process_leaf_block_dir2(
+       xfs_mount_t             *mp,
+       xfs_dir2_leaf_t         *leaf,
+       xfs_dablk_t             da_bno,
+       xfs_ino_t               ino,
+       xfs_dahash_t            last_hashval,
+       xfs_dahash_t            *next_hashval)
+{
+       int                     i;
+       int                     stale;
+
+       for (i = stale = 0; i < INT_GET(leaf->hdr.count, ARCH_CONVERT); i++) {
+               if ((char *)&leaf->ents[i] >= (char *)leaf + mp->m_dirblksize) {
+                       do_warn("bad entry count in block %u of directory "
+                               "inode %llu\n",
+                               da_bno, ino);
+                       return 1;
+               }
+               if (INT_GET(leaf->ents[i].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
+                       stale++;
+               else if (INT_GET(leaf->ents[i].hashval, ARCH_CONVERT) < last_hashval) {
+                       do_warn("bad hash ordering in block %u of directory "
+                               "inode %llu\n",
+                               da_bno, ino);
+                       return 1;
+               }
+               *next_hashval = last_hashval = INT_GET(leaf->ents[i].hashval, ARCH_CONVERT);
+       }
+       if (stale != INT_GET(leaf->hdr.stale, ARCH_CONVERT)) {
+               do_warn("bad stale count in block %u of directory inode %llu\n",
+                       da_bno, ino);
+               return 1;
+       }
+       return 0;
+}
+
+/*
+ * Returns 0 if the directory is ok, 1 if it has to be rebuilt.
+ */
+static int
+process_leaf_level_dir2(
+       xfs_mount_t             *mp,
+       dir2_bt_cursor_t        *da_cursor,
+       int                     *repair)
+{ 
+       bmap_ext_t              *bmp;
+       xfs_dabuf_t             *bp;
+       int                     buf_dirty;
+       xfs_dahash_t            current_hashval;
+       xfs_dablk_t             da_bno;
+       xfs_dahash_t            greatest_hashval;
+       xfs_ino_t               ino;
+       xfs_dir2_leaf_t         *leaf;
+       int                     nex;
+       xfs_dablk_t             prev_bno;
+
+       da_bno = da_cursor->level[0].bno;
+       ino = da_cursor->ino;
+       prev_bno = 0;
+       bmp = NULL;
+       current_hashval = 0;
+       buf_dirty = 0;
+
+       do {
+               nex = blkmap_getn(da_cursor->blkmap, da_bno, mp->m_dirblkfsbs,
+                       &bmp);
+               /*
+                * Directory code uses 0 as the NULL block pointer since 0
+                * is the root block and no directory block pointer can point
+                * to the root block of the btree.
+                */
+               ASSERT(da_bno != 0);
+
+               if (nex == 0) {
+                       do_warn("can't map block %u for directory inode %llu\n",
+                               da_bno, ino);
+                       goto error_out;
+               }
+               bp = da_read_buf(mp, nex, bmp);
+               free(bmp);
+               bmp = NULL;
+               if (bp == NULL) {
+                       do_warn("can't read file block %u for directory inode "
+                               "%llu\n",
+                               da_bno, ino);
+                       goto error_out;
+               }
+               leaf = bp->data;
+               /*
+                * Check magic number for leaf directory btree block.
+                */
+               if (INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) != XFS_DIR2_LEAFN_MAGIC) {
+                       do_warn("bad directory leaf magic # %#x for directory "
+                               "inode %llu block %u\n",
+                               INT_GET(leaf->hdr.info.magic, ARCH_CONVERT), ino, da_bno);
+                       da_brelse(bp);
+                       goto error_out;
+               }
+               buf_dirty = 0;
+               /*
+                * For each block, process the block, verify its path,
+                * then get next block.  Update cursor values along the way.
+                */
+               if (process_leaf_block_dir2(mp, leaf, da_bno, ino,
+                               current_hashval, &greatest_hashval)) {
+                       da_brelse(bp);
+                       goto error_out;
+               }
+               /*
+                * Index can be set to hdr.count so match the indices of the
+                * interior blocks -- which at the end of the block will point
+                * to 1 after the final real entry in the block.
+                */
+               da_cursor->level[0].hashval = greatest_hashval;
+               da_cursor->level[0].bp = bp;
+               da_cursor->level[0].bno = da_bno;
+               da_cursor->level[0].index = INT_GET(leaf->hdr.count, ARCH_CONVERT);
+               da_cursor->level[0].dirty = buf_dirty;
+
+               if (INT_GET(leaf->hdr.info.back, ARCH_CONVERT) != prev_bno) {
+                       do_warn("bad sibling back pointer for block %u in "
+                               "directory inode %llu\n",
+                               da_bno, ino);
+                       da_brelse(bp);
+                       goto error_out;
+               }
+               prev_bno = da_bno;
+               da_bno = INT_GET(leaf->hdr.info.forw, ARCH_CONVERT);
+               if (da_bno != 0) {
+                       if (verify_dir2_path(mp, da_cursor, 0)) {
+                               da_brelse(bp);
+                               goto error_out;
+                       }
+               }
+               current_hashval = greatest_hashval;
+               ASSERT(buf_dirty == 0 || buf_dirty && !no_modify);
+               if (buf_dirty && !no_modify) {
+                       *repair = 1;
+                       da_bwrite(mp, bp);
+               } else
+                       da_brelse(bp);
+       } while (da_bno != 0);
+       if (verify_final_dir2_path(mp, da_cursor, 0)) {
+               /*
+                * Verify the final path up (right-hand-side) if still ok.
+                */
+               do_warn("bad hash path in directory %llu\n", ino);
+               goto error_out;
+       }
+       /*
+        * Redundant but just for testing.
+        */
+       release_dir2_cursor(mp, da_cursor, 0);
+       return 0;
+
+error_out:
+       /*
+        * Release all buffers holding interior btree blocks.
+        */
+       err_release_dir2_cursor(mp, da_cursor, 0);
+       if (bmp)
+               free(bmp);
+       return 1;
+}
+
+/*
+ * Return 1 if the directory's leaf/node space is corrupted and
+ * needs to be rebuilt, 0 if it's ok.
+ */
+static int
+process_node_dir2(
+       xfs_mount_t     *mp,
+       xfs_ino_t       ino,
+       xfs_dinode_t    *dip,
+       blkmap_t        *blkmap,
+       int             *repair)
+{
+       xfs_dablk_t             bno;
+       dir2_bt_cursor_t        da_cursor;
+
+       /*
+        * Try again -- traverse down left-side of tree until we hit the
+        * left-most leaf block setting up the btree cursor along the way.
+        * Then walk the leaf blocks left-to-right, calling a parent
+        * verification routine each time we traverse a block.
+        */
+       bzero(&da_cursor, sizeof(da_cursor));
+       da_cursor.ino = ino;
+       da_cursor.dip = dip;
+       da_cursor.blkmap = blkmap;
+
+       /*
+        * Now process interior node.
+        */
+       if (traverse_int_dir2block(mp, &da_cursor, &bno) == 0)
+               return 1;
+
+       /*
+        * Skip directories with a root marked XFS_DIR2_LEAFN_MAGIC
+        */
+       if (bno == 0) {
+               release_dir2_cursor(mp, &da_cursor, 0);
+               return 0;
+       } else {
+               /*
+                * Now pass cursor and bno into leaf-block processing routine.
+                * The leaf dir level routine checks the interior paths up to 
+                * the root including the final right-most path.
+                */
+               return process_leaf_level_dir2(mp, &da_cursor, repair);
+       }
+}
+
+/*
+ * Process leaf and node directories.
+ * Process the data blocks then, if it's a node directory, check
+ * the consistency of those blocks.
+ */
+static int
+process_leaf_node_dir2(
+       xfs_mount_t     *mp,
+       xfs_ino_t       ino,
+       xfs_dinode_t    *dip,
+       int             ino_discovery,
+       char            *dirname,       /* directory pathname */
+       xfs_ino_t       *parent,        /* out - NULLFSINO if entry not exist */
+       blkmap_t        *blkmap,
+       int             *dot,           /* out - 1 if there is a dot, else 0 */
+       int             *dotdot,        /* out - 1 if there's a dotdot, else 0 */
+       int             *repair,        /* out - 1 if something was fixed */
+       int             isnode)         /* node directory not leaf */
+{
+       bmap_ext_t              *bmp;
+       xfs_dabuf_t             *bp;
+       xfs_dir2_data_t         *data;
+       xfs_dfiloff_t           dbno;
+       int                     good;
+       int                     i;
+       xfs_dfiloff_t           ndbno;
+       int                     nex;
+       int                     t;
+
+       *repair = *dot = *dotdot = good = 0;
+       *parent = NULLFSINO;
+       ndbno = NULLDFILOFF;
+       while ((dbno = blkmap_next_off(blkmap, ndbno, &t)) < mp->m_dirleafblk) {
+               nex = blkmap_getn(blkmap, dbno, mp->m_dirblkfsbs, &bmp);
+               ndbno = dbno + mp->m_dirblkfsbs - 1;
+               if (nex == 0) {
+                       do_warn("block %llu for directory inode %llu is "
+                               "missing\n",
+                               dbno, ino);
+                       continue;
+               }
+               bp = da_read_buf(mp, nex, bmp);
+               free(bmp);
+               if (bp == NULL) {
+                       do_warn("can't read block %llu for directory inode "
+                               "%llu\n",
+                               dbno, ino);
+                       continue;
+               }
+               data = bp->data;
+               if (INT_GET(data->hdr.magic, ARCH_CONVERT) != XFS_DIR2_DATA_MAGIC)
+                       do_warn("bad directory block magic # %#x in block %llu "
+                               "for directory inode %llu\n",
+                               INT_GET(data->hdr.magic, ARCH_CONVERT), dbno, ino);
+               i = process_dir2_data(mp, ino, dip, ino_discovery, dirname,
+                       parent, bp, dot, dotdot, (xfs_dablk_t)dbno,
+                       (char *)data + mp->m_dirblksize);
+               if (i == 0)
+                       good++;
+               if (bp->dirty && !no_modify) {
+                       *repair = 1;
+                       da_bwrite(mp, bp);
+               } else
+                       da_brelse(bp);
+       }
+       if (good == 0)
+               return 1;
+       if (!isnode)
+               return 0;
+       if (dir2_is_badino(ino))
+               return 0;
+
+       if (process_node_dir2(mp, ino, dip, blkmap, repair))
+               dir2_add_badlist(ino);
+       return 0;
+
+}
+
+/*
+ * Returns 1 if things are bad (directory needs to be junked)
+ * and 0 if things are ok.  If ino_discovery is 1, add unknown
+ * inodes to uncertain inode list.
+ */
+int
+process_dir2(
+       xfs_mount_t     *mp,
+       xfs_ino_t       ino,
+       xfs_dinode_t    *dip,
+       int             ino_discovery,
+       int             *dino_dirty,
+       char            *dirname,
+       xfs_ino_t       *parent,
+       blkmap_t        *blkmap)
+{
+       int             dot;
+       int             dotdot;
+       xfs_dfiloff_t   last;
+       int             repair;
+       int             res;
+
+       *parent = NULLFSINO;
+       dot = dotdot = 0;
+       last = 0;
+
+       /*
+        * branch off depending on the type of inode.  This routine
+        * is only called ONCE so all the subordinate routines will
+        * fix '.' and junk '..' if they're bogus.
+        */
+       if (blkmap)
+               last = blkmap_last_off(blkmap);
+       if (INT_GET(dip->di_core.di_size, ARCH_CONVERT) <= XFS_DFORK_DSIZE_ARCH(dip, mp, ARCH_CONVERT) &&
+           dip->di_core.di_format == XFS_DINODE_FMT_LOCAL) {
+               dot = dotdot = 1;
+               res = process_sf_dir2(mp, ino, dip, ino_discovery, dino_dirty,
+                       dirname, parent, &repair);
+       } else if (last == mp->m_dirblkfsbs &&
+                (dip->di_core.di_format == XFS_DINODE_FMT_EXTENTS ||
+                 dip->di_core.di_format == XFS_DINODE_FMT_BTREE)) {
+               res = process_block_dir2(mp, ino, dip, ino_discovery,
+                       dino_dirty, dirname, parent, blkmap, &dot, &dotdot,
+                       &repair);
+       } else if (last >= mp->m_dirleafblk + mp->m_dirblkfsbs &&
+                (dip->di_core.di_format == XFS_DINODE_FMT_EXTENTS ||
+                 dip->di_core.di_format == XFS_DINODE_FMT_BTREE)) {
+               res = process_leaf_node_dir2(mp, ino, dip, ino_discovery,
+                       dirname, parent, blkmap, &dot, &dotdot, &repair,
+                       last > mp->m_dirleafblk + mp->m_dirblkfsbs);
+       } else {
+               do_warn("bad size/format for directory %llu\n", ino);
+               return 1;
+       }
+       /*
+        * bad . entries in all directories will be fixed up in phase 6
+        */
+       if (dot == 0) {
+               do_warn("no . entry for directory %llu\n", ino);
+       }
+
+       /*
+        * shortform dirs always have a .. entry.  .. for all longform
+        * directories will get fixed in phase 6. .. for other shortform
+        * dirs also get fixed there.  .. for a shortform root was
+        * fixed in place since we know what it should be
+        */
+       if (dotdot == 0 && ino != mp->m_sb.sb_rootino) {
+               do_warn("no .. entry for directory %llu\n", ino);
+       } else if (dotdot == 0 && ino == mp->m_sb.sb_rootino) {
+               do_warn("no .. entry for root directory %llu\n", ino);
+               need_root_dotdot = 1;
+       }
+       
+       ASSERT(ino != mp->m_sb.sb_rootino && ino != *parent ||
+               ino == mp->m_sb.sb_rootino &&
+                       (ino == *parent || need_root_dotdot == 1));
+
+       return res;
+}
diff --git a/repair/dir2.h b/repair/dir2.h
new file mode 100644 (file)
index 0000000..9583447
--- /dev/null
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#ifndef _XR_DIR2_H
+#define        _XR_DIR2_H
+
+struct blkmap;
+struct bmap_ext;
+
+/*
+ * the cursor gets passed up and down the da btree processing
+ * routines.  The interior block processing routines use the
+ * cursor to determine if the pointers to and from the preceding
+ * and succeeding sibling blocks are ok and whether the values in
+ * the current block are consistent with the entries in the parent
+ * nodes.  When a block is traversed, a parent-verification routine
+ * is called to verify if the next logical entry in the next level up
+ * is consistent with the greatest hashval in the next block of the
+ * current level.  The verification routine is itself recursive and
+ * calls itself if it has to traverse an interior block to get
+ * the next logical entry.  The routine recurses upwards through
+ * the tree until it finds a block where it can simply step to
+ * the next entry.  The hashval in that entry should be equal to
+ * the hashval being passed to it (the greatest hashval in the block
+ * that the entry points to).  If that isn't true, then the tree
+ * is blown and we need to trash it, salvage and trash it, or fix it.
+ * Currently, we just trash it.
+ */
+typedef struct dir2_level_state  {
+       xfs_dabuf_t     *bp;            /* block bp */
+       xfs_dablk_t     bno;            /* file block number */
+       xfs_dahash_t    hashval;        /* last verified hashval */
+       int             index;          /* current index in block */
+       int             dirty;          /* is buffer dirty ? (1 == yes) */
+} dir2_level_state_t;
+
+typedef struct dir2_bt_cursor  {
+       int                     active; /* highest level in tree (# levels-1) */
+       int                     type;   /* 0 if dir, 1 if attr */
+       xfs_ino_t               ino;
+       xfs_dablk_t             greatest_bno;
+       xfs_dinode_t            *dip;
+       dir2_level_state_t      level[XFS_DA_NODE_MAXDEPTH];
+       struct blkmap           *blkmap;
+} dir2_bt_cursor_t;
+
+
+/* ROUTINES */
+
+void
+err_release_dir2_cursor(
+       xfs_mount_t             *mp,
+       dir2_bt_cursor_t        *cursor,
+       int                     prev_level);
+
+xfs_dabuf_t *
+da_read_buf(
+       xfs_mount_t     *mp,
+       int             nex,
+       struct bmap_ext *bmp);
+
+int
+da_bwrite(
+       xfs_mount_t     *mp,
+       xfs_dabuf_t     *bp);
+
+void
+da_brelse(
+       xfs_dabuf_t     *bp);
+
+int
+process_dir2(
+       xfs_mount_t     *mp,
+       xfs_ino_t       ino,
+       xfs_dinode_t    *dip,
+       int             ino_discovery,
+       int             *dirty,
+       char            *dirname,
+       xfs_ino_t       *parent,
+       struct blkmap   *blkmap);
+
+void
+process_sf_dir2_fixi8(
+       xfs_dir2_sf_t           *sfp,
+       xfs_dir2_sf_entry_t     **next_sfep);
+
+void
+dir2_add_badlist(
+       xfs_ino_t       ino);
+
+int
+dir2_is_badino(
+       xfs_ino_t       ino);
+
+#endif /* _XR_DIR2_H */
diff --git a/repair/dir_stack.c b/repair/dir_stack.c
new file mode 100644 (file)
index 0000000..1d0aae4
--- /dev/null
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "dir_stack.h"
+#include "err_protos.h"
+
+/*
+ * a directory stack for holding directories while
+ * we traverse filesystem hierarchy subtrees.
+ * names are kind of misleading as this is really
+ * implemented as an inode stack.  so sue me...
+ */
+
+static dir_stack_t     dirstack_freelist;
+static int             dirstack_init = 0;
+
+void
+dir_stack_init(dir_stack_t *stack)
+{
+       stack->cnt = 0;
+       stack->head = NULL;
+
+       if (dirstack_init == 0)  {
+               dirstack_init = 1;
+               dir_stack_init(&dirstack_freelist);
+       }
+
+       stack->cnt = 0;
+       stack->head = NULL;
+
+       return;
+}
+
+static void
+dir_stack_push(dir_stack_t *stack, dir_stack_elem_t *elem)
+{
+       ASSERT(stack->cnt > 0 || stack->cnt == 0 && stack->head == NULL);
+
+       elem->next = stack->head;
+       stack->head = elem;
+       stack->cnt++;
+
+       return;
+}
+
+static dir_stack_elem_t *
+dir_stack_pop(dir_stack_t *stack)
+{
+       dir_stack_elem_t *elem;
+
+       if (stack->cnt == 0)  {
+               ASSERT(stack->head == NULL);
+               return(NULL);
+       }
+
+       elem = stack->head;
+
+       ASSERT(elem != NULL);
+
+       stack->head = elem->next;
+       elem->next = NULL;
+       stack->cnt--;
+
+       return(elem);
+}
+
+void
+push_dir(dir_stack_t *stack, xfs_ino_t ino)
+{
+       dir_stack_elem_t *elem;
+
+       if (dirstack_freelist.cnt == 0)  {
+               if ((elem = malloc(sizeof(dir_stack_elem_t))) == NULL)  {
+                       do_error(
+                       "couldn't malloc dir stack element, try more swap\n");
+                       exit(1);
+               }
+       } else  {
+               elem = dir_stack_pop(&dirstack_freelist);
+       }
+
+       elem->ino = ino;
+
+       dir_stack_push(stack, elem);
+
+       return;
+}
+
+xfs_ino_t
+pop_dir(dir_stack_t *stack)
+{
+       dir_stack_elem_t *elem;
+       xfs_ino_t ino;
+
+       elem = dir_stack_pop(stack);
+
+       if (elem == NULL)
+               return(NULLFSINO);
+
+       ino = elem->ino;
+       elem->ino = NULLFSINO;
+
+       dir_stack_push(&dirstack_freelist, elem);
+
+       return(ino);
+}
diff --git a/repair/dir_stack.h b/repair/dir_stack.h
new file mode 100644 (file)
index 0000000..9a8305b
--- /dev/null
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+typedef struct dir_stack_elem  {
+       xfs_ino_t               ino;
+       struct dir_stack_elem   *next;
+} dir_stack_elem_t;
+
+typedef struct dir_stack  {
+       int                     cnt;
+       dir_stack_elem_t        *head;
+} dir_stack_t;
+
+
+void           dir_stack_init(dir_stack_t *stack);
+
+void           push_dir(dir_stack_t *stack, xfs_ino_t ino);
+xfs_ino_t      pop_dir(dir_stack_t *stack);
diff --git a/repair/err_protos.h b/repair/err_protos.h
new file mode 100644 (file)
index 0000000..7d5aa5c
--- /dev/null
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+void   do_abort(char const *, ...);            /* abort, internal error */
+void   do_error(char const *, ...);            /* abort, system error */
+void   do_warn(char const *, ...);             /* issue warning */
+void   do_log(char const *, ...);              /* issue log message */
diff --git a/repair/globals.c b/repair/globals.c
new file mode 100644 (file)
index 0000000..206d084
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+
+#define EXTERN
+#include "globals.h"
+
diff --git a/repair/globals.h b/repair/globals.h
new file mode 100644 (file)
index 0000000..5c33d5f
--- /dev/null
@@ -0,0 +1,205 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#ifndef _XFS_REPAIR_GLOBAL_H
+#define _XFS_REPAIR_GLOBAL_H
+
+#ifndef EXTERN
+#define EXTERN extern
+#endif
+
+/* useful macros */
+
+#define rounddown(x, y) (((x)/(y))*(y))
+
+/* error flags */
+
+#define XR_OK                  0       /* good */
+#define XR_BAD_MAGIC           1       /* bad magic number */
+#define XR_BAD_BLOCKSIZE       2       /* bad block size */
+#define XR_BAD_BLOCKLOG                3       /* bad sb_blocklog field */
+#define XR_BAD_VERSION         4       /* bad version number */
+#define XR_BAD_INPROGRESS      5       /* in progress set */
+#define XR_BAD_FS_SIZE_DATA    6       /* ag sizes, number, fs size mismatch */
+#define XR_BAD_INO_SIZE_DATA   7       /* bad inode size or perblock fields */
+#define XR_BAD_SECT_SIZE_DATA  8       /* bad sector size info */
+#define XR_AGF_GEO_MISMATCH    9       /* agf info conflicts with sb */
+#define XR_AGI_GEO_MISMATCH    10      /* agf info conflicts with sb */
+#define XR_SB_GEO_MISMATCH     11      /* sb geo conflicts with fs sb geo */
+#define XR_EOF                 12      /* seeked beyond EOF */
+#define XR_BAD_RT_GEO_DATA     13      /* realtime geometry inconsistent */
+#define XR_BAD_INO_MAX_PCT     14      /* max % of inodes > 100% */
+#define XR_BAD_INO_ALIGN       15      /* bad inode alignment value */
+#define XR_INSUFF_SEC_SB       16      /* not enough matching secondary sbs */
+#define XR_BAD_SB_UNIT         17      /* bad stripe unit */
+#define XR_BAD_SB_WIDTH                18      /* bad stripe width */
+#define XR_BAD_SVN             19      /* bad shared version number */
+#define XR_BAD_ERR_CODE                20      /* Bad error code */
+
+/* XFS filesystem (il)legal values */
+
+#define XR_LOG2BSIZE_MIN       9       /* min/max fs blocksize (log2) */
+#define XR_LOG2BSIZE_MAX       16      /* 2^XR_* == blocksize */
+
+#define        NUM_SBS                 8       /* max # of sbs to verify */
+#define NUM_AGH_SECTS          4       /* # of components in an ag header */
+
+#define        MEM_ALIGN               128     /* I/O buf alignment - a cache line */
+
+/*
+ * secondary sb mask -- if the secondary sb feature bits has a
+ * the partial sb mask bit set, then you depend on the fields
+ * in it up to and including sb_inoalignmt but the unused part of the
+ * sector may have trash in it.  If the sb has any bits set that are in
+ * the good mask, then the entire sb and sector are good (was bzero'ed
+ * by mkfs).  The third mask is for filesystems made by pre-6.5 campus
+ * alpha mkfs's.  Those are rare so we'll check for those under
+ * a special option.
+ */
+#define XR_PART_SECSB_VNMASK   0x0F80  /* >= XFS_SB_VERSION_ALIGNBIT */
+#define XR_GOOD_SECSB_VNMASK   0x0F00  /* >= XFS_SB_VERSION_DALIGNBIT */
+#define XR_ALPHA_SECSB_VNMASK  0x0180  /* DALIGN|ALIGN bits */
+
+/* global variables for xfs_repair */
+
+/* arguments and argument flag variables */
+
+EXTERN char    *fs_name;               /* name of filesystem */
+EXTERN int     verbose;                /* verbose flag, mostly for debugging */
+
+
+/* for reading stuff in manually (bypassing libsim) */
+
+EXTERN char    *iobuf;                 /* large buffer */
+EXTERN int     iobuf_size;
+EXTERN char    *smallbuf;              /* small (1-4 page) buffer */
+EXTERN int     smallbuf_size;
+EXTERN char    *sb_bufs[NUM_SBS];      /* superblock buffers */
+EXTERN int     sbbuf_size;
+
+/* direct I/O info */
+
+EXTERN int     minio_align;            /* min I/O size and alignment */
+EXTERN int     mem_align;              /* memory alignment */
+EXTERN int     max_iosize;             /* max I/O size */
+
+/* file descriptors */
+
+EXTERN int     fs_fd;                  /* filesystem fd */
+
+/* command-line flags */
+
+EXTERN int     verbose;
+EXTERN int     no_modify;
+EXTERN int     isa_file;
+EXTERN int     dumpcore;               /* abort, not exit on fatal errs */
+EXTERN int     delete_attr_ok;         /* can clear attrs w/o clearing files */
+EXTERN int     force_geo;              /* can set geo on low confidence info */
+EXTERN int     assume_xfs;             /* assume we have an xfs fs */
+EXTERN int     pre_65_beta;            /* fs was mkfs'ed by a version earlier * than 6.5-beta */
+EXTERN char *log_name;                 /* Name of log device */
+EXTERN int log_spec;                   /* Log dev specified as option */
+
+/* misc status variables */
+
+EXTERN int             primary_sb_modified;
+EXTERN int             bad_ino_btree;
+EXTERN int             clear_sunit;
+EXTERN int             fs_is_dirty;
+
+/* for hunting down the root inode */
+
+EXTERN int             need_root_inode;
+EXTERN int             need_root_dotdot;
+
+EXTERN int             need_rbmino;
+EXTERN int             need_rsumino;
+
+EXTERN int             lost_quotas;
+EXTERN int             have_uquotino;
+EXTERN int             have_pquotino;
+EXTERN int             lost_uquotino;
+EXTERN int             lost_pquotino;
+
+EXTERN xfs_agino_t     first_prealloc_ino;
+EXTERN xfs_agino_t     last_prealloc_ino;
+EXTERN xfs_agblock_t   bnobt_root;
+EXTERN xfs_agblock_t   bcntbt_root;
+EXTERN xfs_agblock_t   inobt_root;
+
+/* configuration vars -- fs geometry dependent */
+
+EXTERN int             inodes_per_block;
+EXTERN int             inodes_per_cluster;     /* inodes per inode buffer */
+EXTERN unsigned int    glob_agcount;
+EXTERN int             chunks_pblock;  /* # of 64-ino chunks per allocation */
+EXTERN int             max_symlink_blocks;
+EXTERN __int64_t       fs_max_file_offset;
+
+/* block allocation bitmaps */
+
+EXTERN __uint64_t      **ba_bmap;      /* see incore.h */
+EXTERN __uint64_t      *rt_ba_bmap;    /* see incore.h */
+
+/* realtime info */
+
+EXTERN xfs_rtword_t    *btmcompute;
+EXTERN xfs_suminfo_t   *sumcompute;
+
+/* inode tree records have full or partial backptr fields ? */
+
+EXTERN int             full_backptrs;  /*
+                                        * if 1, use backptrs_t component
+                                        * of ino_un union, if 0, use
+                                        * parent_list_t component.  see
+                                        * incore.h for more details
+                                        */
+
+#define ORPHANAGE      "lost+found"
+
+/* superblock counters */
+
+EXTERN __uint64_t      sb_icount;      /* allocated (made) inodes */
+EXTERN __uint64_t      sb_ifree;       /* free inodes */
+EXTERN __uint64_t      sb_fdblocks;    /* free data blocks */
+EXTERN __uint64_t      sb_frextents;   /* free realtime extents */
+
+EXTERN xfs_ino_t       orphanage_ino;
+EXTERN xfs_ino_t       old_orphanage_ino;
+
+/* superblock geometry info */
+
+EXTERN xfs_extlen_t    sb_inoalignmt;
+EXTERN __uint32_t      sb_unit;
+EXTERN __uint32_t      sb_width;
+
+#endif /* _XFS_REPAIR_GLOBAL_H */
diff --git a/repair/incore.c b/repair/incore.c
new file mode 100644 (file)
index 0000000..4998541
--- /dev/null
@@ -0,0 +1,308 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include <malloc.h>
+#include "avl.h"
+#include "globals.h"
+#include "incore.h"
+#include "agheader.h"
+#include "protos.h"
+#include "err_protos.h"
+
+/*
+ * push a block allocation record onto list.  assumes list
+ * if set to NULL if empty.
+ */
+void
+record_allocation(ba_rec_t *addr, ba_rec_t *list)
+{
+       addr->next = list;
+       list = addr;
+
+       return;
+}
+
+void
+free_allocations(ba_rec_t *list)
+{
+       ba_rec_t *current = list;
+
+       while (list != NULL)  {
+               list = list->next;
+               free(current);
+               current = list;
+       }
+
+       return;
+}
+
+/* ba bmap setupstuff.  setting/getting state is in incore.h  */
+
+void
+setup_bmap(xfs_agnumber_t agno, xfs_agblock_t numblocks, xfs_drtbno_t rtblocks)
+{
+       int i;
+       xfs_drfsbno_t size;
+
+        ba_bmap = (__uint64_t**)malloc(agno*sizeof(__uint64_t *));
+        if (!ba_bmap)  {
+               do_error("couldn't allocate block map pointers\n");
+               return;
+       }
+       for (i = 0; i < agno; i++)  {
+                int size;
+                
+                size = roundup(numblocks * (NBBY/XR_BB),sizeof(__uint64_t));
+                
+                ba_bmap[i] = (__uint64_t*)memalign(sizeof(__uint64_t), size);
+                if (!ba_bmap[i]) {
+                       do_error("couldn't allocate block map, size = %d\n",
+                               numblocks);
+                       return;
+               }
+               bzero(ba_bmap[i], size);
+       }
+
+       if (rtblocks == 0)  {
+               rt_ba_bmap = NULL;
+               return;
+       }
+
+       size = roundup(rtblocks * (NBBY/XR_BB), sizeof(__uint64_t));
+
+        rt_ba_bmap=(__uint64_t*)memalign(sizeof(__uint64_t), size);
+       if (!rt_ba_bmap) {
+                       do_error(
+                       "couldn't allocate real-time block map, size = %llu\n",
+                               rtblocks);
+                       return;
+       }
+
+       /*
+        * start all real-time as free blocks
+        */
+       set_bmap_rt(rtblocks);
+
+       return;
+}
+
+/* ARGSUSED */
+void
+teardown_rt_bmap(xfs_mount_t *mp)
+{
+       if (rt_ba_bmap != NULL)  {
+               free(rt_ba_bmap);
+               rt_ba_bmap = NULL;
+       }
+
+       return;
+}
+
+/* ARGSUSED */
+void
+teardown_ag_bmap(xfs_mount_t *mp, xfs_agnumber_t agno)
+{
+       ASSERT(ba_bmap[agno] != NULL);
+
+       free(ba_bmap[agno]);
+       ba_bmap[agno] = NULL;
+
+       return;
+}
+
+/* ARGSUSED */
+void
+teardown_bmap_finish(xfs_mount_t *mp)
+{
+       free(ba_bmap);
+       ba_bmap = NULL;
+
+       return;
+}
+
+void
+teardown_bmap(xfs_mount_t *mp)
+{
+       xfs_agnumber_t i;
+
+       for (i = 0; i < mp->m_sb.sb_agcount; i++)  {
+               teardown_ag_bmap(mp, i);
+       }
+
+       teardown_rt_bmap(mp);
+       teardown_bmap_finish(mp);
+
+       return;
+}
+
+/*
+ * block map initialization routines -- realtime, log, fs
+ */
+void
+set_bmap_rt(xfs_drtbno_t num)
+{
+       xfs_drtbno_t j;
+       xfs_drtbno_t size;
+
+       /*
+        * for now, initialize all realtime blocks to be free
+        * (state == XR_E_FREE)
+        */
+       size = howmany(num * (NBBY/XR_BB), sizeof(__uint64_t));
+
+       for (j = 0; j < size; j++)
+               rt_ba_bmap[j] = 0x2222222222222222LL;
+       
+       return;
+}
+
+void
+set_bmap_log(xfs_mount_t *mp)
+{
+       xfs_dfsbno_t    logend, i;
+
+       if (mp->m_sb.sb_logstart == 0)
+               return;
+
+       logend = mp->m_sb.sb_logstart + mp->m_sb.sb_logblocks;
+
+       for (i = mp->m_sb.sb_logstart; i < logend ; i++)  {
+               set_fsbno_state(mp, i, XR_E_INUSE_FS);
+       }
+
+       return;
+}
+
+void
+set_bmap_fs(xfs_mount_t *mp)
+{
+       xfs_agnumber_t  i;
+       xfs_agblock_t   j;
+       xfs_agblock_t   end;
+
+       /*
+        * AG header is 4 sectors
+        */
+       end = howmany(4 * mp->m_sb.sb_sectsize, mp->m_sb.sb_blocksize);
+
+       for (i = 0; i < mp->m_sb.sb_agcount; i++)
+               for (j = 0; j < end; j++)
+                       set_agbno_state(mp, i, j, XR_E_INUSE_FS);
+
+       return;
+}
+
+#if 0
+void
+set_bmap_fs_bt(xfs_mount_t *mp)
+{
+       xfs_agnumber_t  i;
+       xfs_agblock_t   j;
+       xfs_agblock_t   begin;
+       xfs_agblock_t   end;
+
+       begin = bnobt_root;
+       end = inobt_root + 1;
+
+       for (i = 0; i < mp->m_sb.sb_agcount; i++)  {
+               /*
+                * account for btree roots
+                */
+               for (j = begin; j < end; j++)
+                       set_agbno_state(mp, i, j, XR_E_INUSE_FS);
+       }
+
+       return;
+}
+#endif
+
+void
+incore_init(xfs_mount_t *mp)
+{
+       int agcount = mp->m_sb.sb_agcount;
+       extern void incore_ino_init(xfs_mount_t *);
+       extern void incore_ext_init(xfs_mount_t *);
+
+       /* init block alloc bmap */
+
+       setup_bmap(agcount, mp->m_sb.sb_agblocks, mp->m_sb.sb_rextents);
+       incore_ino_init(mp);
+       incore_ext_init(mp);
+
+       /* initialize random globals now that we know the fs geometry */
+
+       inodes_per_block = mp->m_sb.sb_inopblock;
+
+       return;
+}
+
+#if defined(XR_BMAP_TRACE) || defined(XR_BMAP_DBG)
+int
+get_agbno_state(xfs_mount_t *mp, xfs_agnumber_t agno,
+               xfs_agblock_t ag_blockno)
+{
+       __uint64_t *addr;
+
+       addr = ba_bmap[(agno)] + (ag_blockno)/XR_BB_NUM;
+
+       return((*addr >> (((ag_blockno)%XR_BB_NUM)*XR_BB)) & XR_BB_MASK);
+}
+
+void set_agbno_state(xfs_mount_t *mp, xfs_agnumber_t agno,
+       xfs_agblock_t ag_blockno, int state)
+{
+       __uint64_t *addr;
+
+       addr = ba_bmap[(agno)] + (ag_blockno)/XR_BB_NUM;
+
+       *addr = (((*addr) &
+         (~((__uint64_t) XR_BB_MASK << (((ag_blockno)%XR_BB_NUM)*XR_BB)))) |
+        (((__uint64_t) (state)) << (((ag_blockno)%XR_BB_NUM)*XR_BB)));
+}
+
+int
+get_fsbno_state(xfs_mount_t *mp, xfs_dfsbno_t blockno)
+{
+       return(get_agbno_state(mp, XFS_FSB_TO_AGNO(mp, blockno),
+                       XFS_FSB_TO_AGBNO(mp, blockno)));
+}
+
+void
+set_fsbno_state(xfs_mount_t *mp, xfs_dfsbno_t blockno, int state)
+{
+       set_agbno_state(mp, XFS_FSB_TO_AGNO(mp, blockno),
+               XFS_FSB_TO_AGBNO(mp, blockno), state);
+
+       return;
+}
+#endif
diff --git a/repair/incore.h b/repair/incore.h
new file mode 100644 (file)
index 0000000..22ffdea
--- /dev/null
@@ -0,0 +1,564 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+/*
+ * contains definition information.  implementation (code)
+ * is spread out in separate files.
+ */
+
+/*
+ * block allocation lists
+ */
+typedef struct ba_rec  {
+       void            *addr;
+       struct ba_rec   *next;
+} ba_rec_t;
+
+void                   record_allocation(ba_rec_t *addr, ba_rec_t *list);
+void                   free_allocations(ba_rec_t *list);
+
+/*
+ * block bit map defs -- track state of each filesystem block.
+ * ba_bmap is an array of bitstrings declared in the globals.h file.
+ * the bitstrings are broken up into 64-bit chunks.  one bitstring per AG.
+ */
+#define BA_BMAP_SIZE(x)                (howmany(x, 4))
+
+void                   set_bmap_rt(xfs_drfsbno_t numblocks);
+void                   set_bmap_log(xfs_mount_t *mp);
+void                   set_bmap_fs(xfs_mount_t *mp);
+void                   teardown_bmap(xfs_mount_t *mp);
+
+void                   teardown_rt_bmap(xfs_mount_t *mp);
+void                   teardown_ag_bmap(xfs_mount_t *mp, xfs_agnumber_t agno);
+void                   teardown_bmap_finish(xfs_mount_t *mp);
+
+/* blocks are numbered from zero */
+
+/* block records fit into __uint64_t's units */
+
+#define XR_BB_UNIT     64                      /* number of bits/unit */
+#define XR_BB          4                       /* bits per block record */
+#define XR_BB_NUM      (XR_BB_UNIT/XR_BB)      /* number of records per unit */
+#define XR_BB_MASK     0xF                     /* block record mask */
+
+/*
+ * bitstring ops -- set/get block states, either in filesystem
+ * bno's or in agbno's.  turns out that fsbno addressing is
+ * more convenient when dealing with bmap extracted addresses
+ * and agbno addressing is more convenient when dealing with
+ * meta-data extracted addresses.  So the fsbno versions use
+ * mtype (which can be one of the block map types above) to
+ * set the correct block map while the agbno versions assume
+ * you want to use the regular block map.
+ */
+
+#if defined(XR_BMAP_TRACE) || defined(XR_BMAP_DBG)
+/*
+ * implemented as functions for debugging purposes
+ */
+int get_agbno_state(xfs_mount_t *mp, xfs_agnumber_t agno,
+       xfs_agblock_t ag_blockno);
+void set_agbno_state(xfs_mount_t *mp, xfs_agnumber_t agno,
+       xfs_agblock_t ag_blockno, int state);
+
+int get_fsbno_state(xfs_mount_t *mp, xfs_dfsbno_t blockno);
+void set_fsbno_state(xfs_mount_t *mp, xfs_dfsbno_t blockno, int state);
+#else
+/*
+ * implemented as macros for performance purposes
+ */
+
+#define get_agbno_state(mp, agno, ag_blockno) \
+                       ((int) (*(ba_bmap[(agno)] + (ag_blockno)/XR_BB_NUM) \
+                                >> (((ag_blockno)%XR_BB_NUM)*XR_BB)) \
+                               & XR_BB_MASK)
+#define set_agbno_state(mp, agno, ag_blockno, state) \
+       *(ba_bmap[(agno)] + (ag_blockno)/XR_BB_NUM) = \
+               ((*(ba_bmap[(agno)] + (ag_blockno)/XR_BB_NUM) & \
+         (~((__uint64_t) XR_BB_MASK << (((ag_blockno)%XR_BB_NUM)*XR_BB)))) | \
+        (((__uint64_t) (state)) << (((ag_blockno)%XR_BB_NUM)*XR_BB)))
+
+#define get_fsbno_state(mp, blockno) \
+               get_agbno_state(mp, XFS_FSB_TO_AGNO(mp, (blockno)), \
+                               XFS_FSB_TO_AGBNO(mp, (blockno)))
+#define set_fsbno_state(mp, blockno, state) \
+               set_agbno_state(mp, XFS_FSB_TO_AGNO(mp, (blockno)), \
+                       XFS_FSB_TO_AGBNO(mp, (blockno)), (state))
+
+
+#define get_agbno_rec(mp, agno, ag_blockno) \
+                       (*(ba_bmap[(agno)] + (ag_blockno)/XR_BB_NUM))
+#endif /* XR_BMAP_TRACE */
+
+/*
+ * these work in real-time extents (e.g. fsbno == rt extent number)
+ */
+#define get_rtbno_state(mp, fsbno) \
+                       ((*(rt_ba_bmap + (fsbno)/XR_BB_NUM) >> \
+                       (((fsbno)%XR_BB_NUM)*XR_BB)) & XR_BB_MASK)
+#define set_rtbno_state(mp, fsbno, state) \
+       *(rt_ba_bmap + (fsbno)/XR_BB_NUM) = \
+        ((*(rt_ba_bmap + (fsbno)/XR_BB_NUM) & \
+         (~((__uint64_t) XR_BB_MASK << (((fsbno)%XR_BB_NUM)*XR_BB)))) | \
+        (((__uint64_t) (state)) << (((fsbno)%XR_BB_NUM)*XR_BB)))
+
+
+/*
+ * extent tree definitions
+ * right now, there are 3 trees per AG, a bno tree, a bcnt tree
+ * and a tree for dup extents.  If the code is modified in the
+ * future to use an extent tree instead of a bitmask for tracking
+ * fs blocks, then we could lose the dup extent tree if we labelled
+ * each extent with the inode that owned it.
+ */
+
+typedef unsigned char extent_state_t;
+
+typedef struct extent_tree_node  {
+       avlnode_t               avl_node;
+       xfs_agblock_t           ex_startblock;  /* starting block (agbno) */
+       xfs_extlen_t            ex_blockcount;  /* number of blocks in extent */
+       extent_state_t          ex_state;       /* see state flags below */
+
+       struct extent_tree_node         *next;  /* for bcnt extent lists */
+#if 0
+       xfs_ino_t               ex_inode;       /* owner, NULL if free or  */
+                                               /*      multiply allocated */
+#endif
+} extent_tree_node_t;
+
+typedef struct rt_extent_tree_node  {
+       avlnode_t               avl_node;
+       xfs_drtbno_t            rt_startblock;  /* starting realtime block */
+       xfs_extlen_t            rt_blockcount;  /* number of blocks in extent */
+       extent_state_t          rt_state;       /* see state flags below */
+
+#if 0
+       xfs_ino_t               ex_inode;       /* owner, NULL if free or  */
+                                               /*      multiply allocated */
+#endif
+} rt_extent_tree_node_t;
+
+/* extent states, prefix with XR_ to avoid conflict with buffer cache defines */
+
+#define XR_E_UNKNOWN   0       /* unknown state */
+#define XR_E_FREE1     1       /* free block (marked by one fs space tree) */
+#define XR_E_FREE      2       /* free block (marked by both fs space trees) */
+#define XR_E_INUSE     3       /* extent used by file/dir data or metadata */
+#define XR_E_INUSE_FS  4       /* extent used by fs ag header or log */
+#define XR_E_MULT      5       /* extent is multiply referenced */
+#define XR_E_INO       6       /* extent used by inodes (inode blocks) */
+#define XR_E_FS_MAP    7       /* extent used by fs space/inode maps */
+#define XR_E_BAD_STATE 8
+
+/* separate state bit, OR'ed into high (4th) bit of ex_state field */
+
+#define XR_E_WRITTEN   0x8     /* extent has been written out, can't reclaim */
+#define good_state(state)      (((state) & (~XR_E_WRITTEN)) >= XR_E_UNKNOWN && \
+                               ((state) & (~XR_E_WRITTEN) < XF_E_BAD_STATE))
+#define written(state)         ((state) & XR_E_WRITTEN)
+#define set_written(state)     (state) &= XR_E_WRITTEN
+
+/*
+ * bno extent tree functions
+ */
+void
+add_bno_extent(xfs_agnumber_t agno, xfs_agblock_t startblock,
+               xfs_extlen_t blockcount);
+
+extent_tree_node_t *
+findfirst_bno_extent(xfs_agnumber_t agno);
+
+extent_tree_node_t *
+find_bno_extent(xfs_agnumber_t agno, xfs_agblock_t agbno);
+
+extent_tree_node_t *
+findfirst_bno_extent(xfs_agnumber_t agno);
+
+#define findnext_bno_extent(exent_ptr) \
+               ((extent_tree_node_t *) ((exent_ptr)->avl_node.avl_nextino))
+
+void
+get_bno_extent(xfs_agnumber_t agno, extent_tree_node_t *ext);
+
+/*
+ * bcnt tree functions
+ */
+void
+add_bcnt_extent(xfs_agnumber_t agno, xfs_agblock_t startblock,
+               xfs_extlen_t blockcount);
+
+extent_tree_node_t *
+findfirst_bcnt_extent(xfs_agnumber_t agno);
+
+extent_tree_node_t *
+find_bcnt_extent(xfs_agnumber_t agno, xfs_agblock_t agbno);
+
+extent_tree_node_t *
+findbiggest_bcnt_extent(xfs_agnumber_t agno);
+
+extent_tree_node_t *
+findnext_bcnt_extent(xfs_agnumber_t agno, extent_tree_node_t *ext);
+
+extent_tree_node_t *
+get_bcnt_extent(xfs_agnumber_t agno, xfs_agblock_t startblock,
+               xfs_extlen_t blockcount);
+
+/*
+ * duplicate extent tree functions
+ */
+void           add_dup_extent(xfs_agnumber_t agno,
+                               xfs_agblock_t startblock,
+                               xfs_extlen_t blockcount);
+
+int            search_dup_extent(xfs_mount_t *mp,
+                               xfs_agnumber_t agno,
+                               xfs_agblock_t agbno);
+
+void           add_rt_dup_extent(xfs_drtbno_t  startblock,
+                               xfs_extlen_t    blockcount);
+
+int            search_rt_dup_extent(xfs_mount_t        *mp,
+                                       xfs_drtbno_t    bno);
+
+/*
+ * extent/tree recyling and deletion routines
+ */
+
+/*
+ * return an extent node to the extent node free list
+ */
+void           release_extent_tree_node(extent_tree_node_t *node);
+
+/*
+ * recycle all the nodes in the per-AG tree
+ */
+void           release_dup_extent_tree(xfs_agnumber_t agno);
+void           release_agbno_extent_tree(xfs_agnumber_t agno);
+void           release_agbcnt_extent_tree(xfs_agnumber_t agno);
+
+/*
+ * realtime duplicate extent tree - this one actually frees the memory
+ */
+void           free_rt_dup_extent_tree(xfs_mount_t *mp);
+
+/*
+ * per-AG extent trees shutdown routine -- all (bno, bcnt and dup)
+ * at once.  this one actually frees the memory instead of just recyling
+ * the nodes.
+ */
+void           incore_ext_teardown(xfs_mount_t *mp);
+
+/*
+ * inode definitions
+ */
+
+/* inode types */
+
+#define XR_INO_UNKNOWN 0               /* unknown */
+#define XR_INO_DIR     1               /* directory */
+#define XR_INO_RTDATA  2               /* realtime file */
+#define XR_INO_RTBITMAP        3               /* realtime bitmap inode */
+#define XR_INO_RTSUM   4               /* realtime summary inode */
+#define XR_INO_DATA    5               /* regular file */
+#define XR_INO_SYMLINK 6               /* symlink */
+#define XR_INO_CHRDEV  7               /* character device */
+#define XR_INO_BLKDEV  8               /* block device */
+#define XR_INO_SOCK    9               /* socket */
+#define XR_INO_FIFO    10              /* fifo */
+#define XR_INO_MOUNTPOINT 11           /* mountpoint */
+
+/* inode allocation tree */
+
+/*
+ * Inodes in the inode allocation trees are allocated in chunks.
+ * Those groups can be easily duplicated in our trees.
+ * Disconnected inodes are harder.  We can do one of two
+ * things in that case:  if we know the inode allocation btrees
+ * are good, then we can disallow directory references to unknown
+ * inode chunks.  If the inode allocation trees have been trashed or
+ * we feel like being aggressive, then as we hit unknown inodes,
+ * we can search on the disk for all contiguous inodes and see if
+ * they fit into chunks.  Before putting them into the inode tree,
+ * we can scan each inode starting at the earliest inode to see which
+ * ones are good.  This protects us from the pathalogical case of
+ * inodes appearing in user-data.  We still may have to mark the
+ * inodes as "possibly fake" so that if a file claims the blocks,
+ * we decide to believe the inodes, especially if they're not
+ * connected.
+ */
+
+#define PLIST_CHUNK_SIZE       4
+
+typedef xfs_ino_t parent_entry_t;
+
+typedef struct parent_list  {
+       __uint64_t              pmask;
+       parent_entry_t          *pentries;
+#ifdef DEBUG
+       short                   cnt;
+#endif
+} parent_list_t;
+
+typedef struct backptrs  {
+       __uint64_t              ino_reached;    /* bit == 1 if reached */
+       __uint64_t              ino_processed;  /* reference checked bit mask */
+       __uint32_t              nlinks[XFS_INODES_PER_CHUNK];
+       parent_list_t           *parents;
+} backptrs_t;
+
+typedef struct ino_tree_node  {
+       avlnode_t               avl_node;
+       xfs_agino_t             ino_startnum;   /* starting inode # */
+       xfs_inofree_t           ir_free;        /* inode free bit mask */
+       __uint64_t              ino_confirmed;  /* confirmed bitmask */
+       __uint64_t              ino_isa_dir;    /* bit == 1 if a directory */
+       union  {
+               backptrs_t      *backptrs;
+               parent_list_t   *plist;
+       } ino_un;
+} ino_tree_node_t;
+
+#define INOS_PER_IREC          (sizeof(__uint64_t) * NBBY)
+void                           add_ino_backptrs(xfs_mount_t *mp);
+
+/*
+ * return an inode record to the free inode record pool
+ */
+void           free_inode_rec(xfs_agnumber_t agno, ino_tree_node_t *ino_rec);
+
+/*
+ * get pulls the inode record from the good inode tree
+ */
+void           get_inode_rec(xfs_agnumber_t agno, ino_tree_node_t *ino_rec);
+
+ino_tree_node_t *findfirst_inode_rec(xfs_agnumber_t agno);
+ino_tree_node_t *find_inode_rec(xfs_agnumber_t agno, xfs_agino_t ino);
+void           find_inode_rec_range(xfs_agnumber_t agno,
+                       xfs_agino_t start_ino, xfs_agino_t end_ino,
+                       ino_tree_node_t **first, ino_tree_node_t **last);
+
+/*
+ * set inode states -- setting an inode to used or free also
+ * automatically marks it as "existing".  Note -- all the inode
+ * add/set/get routines assume a valid inode number.
+ */
+ino_tree_node_t        *set_inode_used_alloc(xfs_agnumber_t agno, xfs_agino_t ino);
+ino_tree_node_t        *set_inode_free_alloc(xfs_agnumber_t agno, xfs_agino_t ino);
+
+void           print_inode_list(xfs_agnumber_t agno);
+void           print_uncertain_inode_list(xfs_agnumber_t agno);
+
+/*
+ * separate trees for uncertain inodes (they may not exist).
+ */
+ino_tree_node_t                *findfirst_uncertain_inode_rec(xfs_agnumber_t agno);
+void                   add_inode_uncertain(xfs_mount_t *mp,
+                                               xfs_ino_t ino, int free);
+void                   add_aginode_uncertain(xfs_agnumber_t agno,
+                                               xfs_agino_t agino, int free);
+void                   get_uncertain_inode_rec(xfs_agnumber_t agno,
+                                               ino_tree_node_t *ino_rec);
+void                   clear_uncertain_ino_cache(xfs_agnumber_t agno);
+
+/*
+ * return next in-order inode tree node.  takes an "ino_tree_node_t *"
+ */
+#define next_ino_rec(ino_node_ptr)     \
+               ((ino_tree_node_t *) ((ino_node_ptr)->avl_node.avl_nextino))
+/*
+ * return the next linked inode (forward avl tree link)-- meant to be used
+ * by linked list routines (uncertain inode routines/records)
+ */
+#define next_link_rec(ino_node_ptr)    \
+               ((ino_tree_node_t *) ((ino_node_ptr)->avl_node.avl_forw))
+
+/*
+ * Bit manipulations for processed field
+ */
+#define        XFS_INOPROC_MASK(i)     ((__uint64_t)1 << (i))
+#define        XFS_INOPROC_MASKN(i,n)  ((__uint64_t)((1 << (n)) - 1) << (i))
+
+#define        XFS_INOPROC_IS_PROC(rp, i) \
+       (((rp)->ino_un.backptrs->ino_processed & XFS_INOPROC_MASK((i))) == 0LL \
+               ? 0 : 1)
+#define        XFS_INOPROC_SET_PROC(rp, i) \
+       ((rp)->ino_un.backptrs->ino_processed |= XFS_INOPROC_MASK((i)))
+/*
+#define        XFS_INOPROC_CLR_PROC(rp, i) \
+       ((rp)->ino_un.backptrs->ino_processed &= ~XFS_INOPROC_MASK((i)))
+*/
+
+/*
+ * same for ir_confirmed.
+ */
+#define        XFS_INOCF_MASK(i)       ((__uint64_t)1 << (i))
+#define        XFS_INOCF_MASKN(i,n)    ((__uint64_t)((1 << (n)) - 1) << (i))
+
+#define        XFS_INOCF_IS_CF(rp, i) \
+               (((rp)->ino_confirmed & XFS_INOCF_MASK((i))) == 0LL \
+                       ? 0 : 1)
+#define        XFS_INOCF_SET_CF(rp, i) \
+                       ((rp)->ino_confirmed |= XFS_INOCF_MASK((i)))
+#define        XFS_INOCF_CLR_CF(rp, i) \
+                       ((rp)->ino_confirmed &= ~XFS_INOCF_MASK((i)))
+
+/*
+ * same for backptr->ino_reached
+ */
+#define        XFS_INO_RCHD_MASK(i)    ((__uint64_t)1 << (i))
+
+#define        XFS_INO_RCHD_IS_RCHD(rp, i) \
+       (((rp)->ino_un.backptrs->ino_reached & XFS_INO_RCHD_MASK((i))) == 0LL \
+               ? 0 : 1)
+#define        XFS_INO_RCHD_SET_RCHD(rp, i) \
+               ((rp)->ino_un.backptrs->ino_reached |= XFS_INO_RCHD_MASK((i)))
+#define        XFS_INO_RCHD_CLR_RCHD(rp, i) \
+               ((rp)->ino_un.backptrs->ino_reached &= ~XFS_INO_RCHD_MASK((i)))
+/*
+ * set/clear/test is inode a directory inode
+ */
+#define        XFS_INO_ISADIR_MASK(i)  ((__uint64_t)1 << (i))
+
+#define inode_isadir(ino_rec, ino_offset) \
+       (((ino_rec)->ino_isa_dir & XFS_INO_ISADIR_MASK((ino_offset))) == 0LL \
+               ? 0 : 1)
+#define set_inode_isadir(ino_rec, ino_offset) \
+               ((ino_rec)->ino_isa_dir |= XFS_INO_ISADIR_MASK((ino_offset)))
+#define clear_inode_isadir(ino_rec, ino_offset) \
+               ((ino_rec)->ino_isa_dir &= ~XFS_INO_ISADIR_MASK((ino_offset)))
+
+
+/*
+ * set/clear/test is inode known to be valid (although perhaps corrupt)
+ */
+#define clear_inode_confirmed(ino_rec, ino_offset) \
+                       XFS_INOCF_CLR_CF((ino_rec), (ino_offset))
+
+#define set_inode_confirmed(ino_rec, ino_offset) \
+                       XFS_INOCF_SET_CF((ino_rec), (ino_offset))
+
+#define is_inode_confirmed(ino_rec, ino_offset) \
+                       XFS_INOCF_IS_CF(ino_rec, ino_offset)
+
+/*
+ * set/clear/test is inode free or used
+ */
+#define set_inode_free(ino_rec, ino_offset) \
+       XFS_INOCF_SET_CF((ino_rec), (ino_offset)), \
+       XFS_INOBT_SET_FREE((ino_rec), (ino_offset),(ARCH_NOCONVERT))
+
+#define set_inode_used(ino_rec, ino_offset) \
+       XFS_INOCF_SET_CF((ino_rec), (ino_offset)), \
+       XFS_INOBT_CLR_FREE((ino_rec), (ino_offset),(ARCH_NOCONVERT))
+
+#define is_inode_used(ino_rec, ino_offset)     \
+       !XFS_INOBT_IS_FREE((ino_rec), (ino_offset),(ARCH_NOCONVERT))
+
+#define is_inode_free(ino_rec, ino_offset)     \
+       XFS_INOBT_IS_FREE((ino_rec), (ino_offset),(ARCH_NOCONVERT))
+
+/*
+ * add_inode_reached() is set on inode I only if I has been reached
+ * by an inode P claiming to be the parent and if I is a directory,
+ * the .. link in the I says that P is I's parent.
+ *
+ * add_inode_ref() is called every time a link to an inode is
+ * detected and drop_inode_ref() is called every time a link to
+ * an inode that we've counted is removed.
+ */
+
+void           add_inode_reached(ino_tree_node_t *ino_rec, int ino_offset);
+void           add_inode_ref(ino_tree_node_t *ino_rec, int ino_offset);
+void           drop_inode_ref(ino_tree_node_t *ino_rec, int ino_offset);
+int            is_inode_reached(ino_tree_node_t *ino_rec, int ino_offset);
+int            is_inode_referenced(ino_tree_node_t *ino_rec, int ino_offset);
+__uint32_t     num_inode_references(ino_tree_node_t *ino_rec, int ino_offset);
+
+/*
+ * has an inode been processed for phase 6 (reference count checking)?
+ * add_inode_refchecked() is set on an inode when it gets traversed
+ * during the reference count phase (6).  It's set so that if the inode
+ * is a directory, it's traversed (and it's links counted) only once.
+ */
+#ifndef XR_INO_REF_DEBUG
+#define add_inode_refchecked(ino, ino_rec, ino_offset) \
+               XFS_INOPROC_SET_PROC((ino_rec), (ino_offset))
+#define is_inode_refchecked(ino, ino_rec, ino_offset) \
+               (XFS_INOPROC_IS_PROC(ino_rec, ino_offset) == 0LL ? 0 : 1)
+#else
+void add_inode_refchecked(xfs_ino_t ino,
+                       ino_tree_node_t *ino_rec, int ino_offset);
+int is_inode_refchecked(xfs_ino_t ino,
+                       ino_tree_node_t *ino_rec, int ino_offset);
+#endif /* XR_INO_REF_DEBUG */
+
+/*
+ * set/get inode number of parent -- works for directory inodes only
+ */
+void           set_inode_parent(ino_tree_node_t *irec, int ino_offset,
+                                       xfs_ino_t ino);
+#if 0
+void           clear_inode_parent(ino_tree_node_t *irec, int offset);
+#endif
+xfs_ino_t      get_inode_parent(ino_tree_node_t *irec, int ino_offset);
+
+/*
+ * bmap cursor for tracking and fixing bmap btrees.  All xfs btrees number
+ * the levels with 0 being the leaf and every level up being 1 greater.
+ */
+
+#define XR_MAX_BMLEVELS                10      /* XXX - rcc need to verify number */
+
+typedef struct bm_level_state  {
+       xfs_dfsbno_t            fsbno;
+       xfs_dfsbno_t            left_fsbno;
+       xfs_dfsbno_t            right_fsbno;
+       __uint64_t              first_key;
+       __uint64_t              last_key;
+/*
+       int                     level;
+       __uint64_t              prev_last_key;
+       xfs_buf_t               *bp;
+       xfs_bmbt_block_t        *block;
+*/
+} bm_level_state_t;
+
+typedef struct bm_cursor  {
+       int                     num_levels;
+       xfs_ino_t               ino;
+       xfs_dinode_t            *dip;
+       bm_level_state_t        level[XR_MAX_BMLEVELS];
+} bmap_cursor_t;
+
+void init_bm_cursor(bmap_cursor_t *cursor, int num_level);
diff --git a/repair/incore_bmc.c b/repair/incore_bmc.c
new file mode 100644 (file)
index 0000000..89111fe
--- /dev/null
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "avl.h"
+#include "globals.h"
+#include "incore.h"
+#include "agheader.h"
+#include "protos.h"
+#include "err_protos.h"
+
+void
+init_bm_cursor(bmap_cursor_t *cursor, int num_levels)
+{
+       int i;
+
+       bzero(cursor, sizeof(bmap_cursor_t));
+       cursor->ino = NULLFSINO;
+       cursor->num_levels = num_levels;
+
+       for (i = 0; i < XR_MAX_BMLEVELS; i++)  {
+               cursor->level[i].fsbno = NULLDFSBNO;
+               cursor->level[i].right_fsbno = NULLDFSBNO;
+               cursor->level[i].left_fsbno = NULLDFSBNO;
+               cursor->level[i].first_key = NULLDFILOFF;
+               cursor->level[i].last_key = NULLDFILOFF;
+       }
+}
diff --git a/repair/incore_ext.c b/repair/incore_ext.c
new file mode 100644 (file)
index 0000000..5c3708b
--- /dev/null
@@ -0,0 +1,1000 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "avl.h"
+#include "globals.h"
+#include "incore.h"
+#include "agheader.h"
+#include "protos.h"
+#include "err_protos.h"
+#include "avl64.h"
+#define ALLOC_NUM_EXTS         100
+
+/*
+ * paranoia -- account for any weird padding, 64/32-bit alignment, etc.
+ */
+typedef struct extent_alloc_rec  {
+       ba_rec_t                alloc_rec;
+       extent_tree_node_t      extents[ALLOC_NUM_EXTS];
+} extent_alloc_rec_t;
+
+typedef struct rt_extent_alloc_rec  {
+       ba_rec_t                alloc_rec;
+       rt_extent_tree_node_t   extents[ALLOC_NUM_EXTS];
+} rt_extent_alloc_rec_t;
+
+/*
+ * note:  there are 4 sets of incore things handled here:
+ * block bitmaps, extent trees, uncertain inode list,
+ * and inode tree.  The tree-based code uses the AVL
+ * tree package used by the IRIX kernel VM code
+ * (sys/avl.h).  The inode list code uses the same records
+ * as the inode tree code for convenience.  The bitmaps
+ * and bitmap operators are mostly macros defined in incore.h.
+ * There are one of everything per AG except for extent
+ * trees.  There's one duplicate extent tree, one bno and
+ * one bcnt extent tree per AG.  Not all of the above exist
+ * through all phases.  The duplicate extent tree gets trashed
+ * at the end of phase 4.  The bno/bcnt trees don't appear until
+ * phase 5.  The uncertain inode list goes away at the end of
+ * phase 3.  The inode tree and bno/bnct trees go away after phase 5.
+ */
+typedef struct ext_flist_s  {
+       extent_tree_node_t      *list;
+       int                     cnt;
+} ext_flist_t;
+
+static ext_flist_t ext_flist;
+
+typedef struct rt_ext_flist_s  {
+       rt_extent_tree_node_t   *list;
+       int                     cnt;
+} rt_ext_flist_t;
+
+static rt_ext_flist_t rt_ext_flist;
+
+static avl64tree_desc_t        *rt_ext_tree_ptr;       /* dup extent tree for rt */
+
+static avltree_desc_t  **extent_tree_ptrs;     /* array of extent tree ptrs */
+                                               /* one per ag for dups */
+static avltree_desc_t  **extent_bno_ptrs;      /*
+                                                * array of extent tree ptrs
+                                                * one per ag for free extents
+                                                * sorted by starting block
+                                                * number
+                                                */
+static avltree_desc_t  **extent_bcnt_ptrs;     /*
+                                                * array of extent tree ptrs
+                                                * one per ag for free extents
+                                                * sorted by size
+                                                */
+
+/*
+ * list of allocated "blocks" for easy freeing later
+ */
+static ba_rec_t                *ba_list;
+static ba_rec_t                *rt_ba_list;
+
+/*
+ * extent tree stuff is avl trees of duplicate extents,
+ * sorted in order by block number.  there is one tree per ag.
+ */
+
+static extent_tree_node_t *
+mk_extent_tree_nodes(xfs_agblock_t new_startblock,
+       xfs_extlen_t new_blockcount, extent_state_t new_state)
+{
+       int i;
+       extent_tree_node_t *new;
+       extent_alloc_rec_t *rec;
+
+       if (ext_flist.cnt == 0)  {
+               ASSERT(ext_flist.list == NULL);
+
+               if ((rec = malloc(sizeof(extent_alloc_rec_t))) == NULL)
+                       do_error("couldn't allocate new extent descriptors.\n");
+
+               record_allocation(&rec->alloc_rec, ba_list);
+
+               new = &rec->extents[0];
+
+               for (i = 0; i < ALLOC_NUM_EXTS; i++)  {
+                       new->avl_node.avl_nextino = (avlnode_t *)
+                                                       ext_flist.list;
+                       ext_flist.list = new;
+                       ext_flist.cnt++;
+                       new++;
+               }
+       }
+
+       ASSERT(ext_flist.list != NULL);
+
+       new = ext_flist.list;
+       ext_flist.list = (extent_tree_node_t *) new->avl_node.avl_nextino;
+       ext_flist.cnt--;
+       new->avl_node.avl_nextino = NULL;
+
+       /* initialize node */
+
+       new->ex_startblock = new_startblock;
+       new->ex_blockcount = new_blockcount;
+       new->ex_state = new_state;
+       new->next = NULL;
+
+       return(new);
+}
+
+void
+release_extent_tree_node(extent_tree_node_t *node)
+{
+       node->avl_node.avl_nextino = (avlnode_t *) ext_flist.list;
+       ext_flist.list = node;
+       ext_flist.cnt++;
+
+       return;
+}
+
+/*
+ * routines to recycle all nodes in a tree.  it walks the tree
+ * and puts all nodes back on the free list so the nodes can be
+ * reused.  the duplicate and bno/bcnt extent trees for each AG
+ * are recycled after they're no longer needed to save memory
+ */
+void
+release_extent_tree(avltree_desc_t *tree)
+{
+       extent_tree_node_t      *ext;
+       extent_tree_node_t      *tmp;
+       extent_tree_node_t      *lext;
+       extent_tree_node_t      *ltmp;
+
+       if (tree->avl_firstino == NULL)
+               return;
+
+       ext = (extent_tree_node_t *) tree->avl_firstino;
+
+       while (ext != NULL)  {
+               tmp = (extent_tree_node_t *) ext->avl_node.avl_nextino;
+
+               /*
+                * ext->next is guaranteed to be set only in bcnt trees
+                */
+               if (ext->next != NULL)  {
+                       lext = ext->next;
+                       while (lext != NULL)  {
+                               ltmp = lext->next;
+                               release_extent_tree_node(lext);
+                               lext = ltmp;
+                       }
+               }
+
+               release_extent_tree_node(ext);
+               ext = tmp;
+       }
+
+       tree->avl_root = tree->avl_firstino = NULL;
+
+       return;
+}
+
+/*
+ * top-level (visible) routines
+ */
+void
+release_dup_extent_tree(xfs_agnumber_t agno)
+{
+       release_extent_tree(extent_tree_ptrs[agno]);
+
+       return;
+}
+
+void
+release_agbno_extent_tree(xfs_agnumber_t agno)
+{
+       release_extent_tree(extent_bno_ptrs[agno]);
+
+       return;
+}
+
+void
+release_agbcnt_extent_tree(xfs_agnumber_t agno)
+{
+       release_extent_tree(extent_bcnt_ptrs[agno]);
+
+       return;
+}
+
+/*
+ * the next 4 routines manage the trees of free extents -- 2 trees
+ * per AG.  The first tree is sorted by block number.  The second
+ * tree is sorted by extent size.  This is the bno tree.
+ */
+void
+add_bno_extent(xfs_agnumber_t agno, xfs_agblock_t startblock,
+               xfs_extlen_t blockcount)
+{
+       extent_tree_node_t *ext;
+
+       ASSERT(extent_bno_ptrs != NULL);
+       ASSERT(extent_bno_ptrs[agno] != NULL);
+
+       ext = mk_extent_tree_nodes(startblock, blockcount, XR_E_FREE);
+
+       if (avl_insert(extent_bno_ptrs[agno], (avlnode_t *) ext) == NULL)  {
+               do_error("xfs_repair:  duplicate bno extent range\n");
+       }
+}
+
+extent_tree_node_t *
+findfirst_bno_extent(xfs_agnumber_t agno)
+{
+       ASSERT(extent_bno_ptrs != NULL);
+       ASSERT(extent_bno_ptrs[agno] != NULL);
+
+       return((extent_tree_node_t *) extent_bno_ptrs[agno]->avl_firstino);
+}
+
+extent_tree_node_t *
+find_bno_extent(xfs_agnumber_t agno, xfs_agblock_t startblock)
+{
+       ASSERT(extent_bno_ptrs != NULL);
+       ASSERT(extent_bno_ptrs[agno] != NULL);
+
+       return((extent_tree_node_t *) avl_find(extent_bno_ptrs[agno],
+                                               startblock));
+}
+
+/*
+ * delete a node that's in the tree (pointer obtained by a find routine)
+ */
+void
+get_bno_extent(xfs_agnumber_t agno, extent_tree_node_t *ext)
+{
+       ASSERT(extent_bno_ptrs != NULL);
+       ASSERT(extent_bno_ptrs[agno] != NULL);
+
+       avl_delete(extent_bno_ptrs[agno], &ext->avl_node);
+
+       return;
+}
+
+/*
+ * normalizing constant for bcnt size -> address conversion (see avl ops)
+ * used by the AVL tree code to convert sizes and must be used when
+ * doing an AVL search in the tree (e.g. avl_findrange(s))
+ */
+#define MAXBCNT                0xFFFFFFFF
+#define BCNT_ADDR(cnt) ((unsigned int) MAXBCNT - (cnt))
+
+/*
+ * the next 4 routines manage the trees of free extents -- 2 trees
+ * per AG.  The first tree is sorted by block number.  The second
+ * tree is sorted by extent size.  This is the bcnt tree.
+ */
+void
+add_bcnt_extent(xfs_agnumber_t agno, xfs_agblock_t startblock,
+               xfs_extlen_t blockcount)
+{
+       extent_tree_node_t *ext, *prev, *current, *top;
+       xfs_agblock_t           tmp_startblock;
+       xfs_extlen_t            tmp_blockcount;
+       extent_state_t          tmp_state;
+
+       ASSERT(extent_bcnt_ptrs != NULL);
+       ASSERT(extent_bcnt_ptrs[agno] != NULL);
+
+       ext = mk_extent_tree_nodes(startblock, blockcount, XR_E_FREE);
+
+       ASSERT(ext->next == NULL);
+
+#ifdef XR_BCNT_TRACE
+       fprintf(stderr, "adding bcnt: agno = %d, start = %u, count = %u\n",
+                       agno, startblock, blockcount);
+#endif
+       if ((current = (extent_tree_node_t *) avl_find(extent_bcnt_ptrs[agno],
+                                                       blockcount)) != NULL)  {
+               /*
+                * avl tree code doesn't handle dups so insert
+                * onto linked list in increasing startblock order
+                */
+               top = prev = current;
+               while (current != NULL &&
+                               startblock > current->ex_startblock)  {
+                       prev = current;
+                       current = current->next;
+               }
+
+               if (top == current)  {
+                       ASSERT(top == prev);
+                       /*
+                        * swap the values of to-be-inserted element
+                        * and the values of the head of the list.
+                        * then insert as the 2nd element on the list.
+                        *
+                        * see the comment in get_bcnt_extent()
+                        * as to why we have to do this.
+                        */
+                       tmp_startblock = top->ex_startblock;
+                       tmp_blockcount = top->ex_blockcount;
+                       tmp_state = top->ex_state;
+
+                       top->ex_startblock = ext->ex_startblock;
+                       top->ex_blockcount = ext->ex_blockcount;
+                       top->ex_state = ext->ex_state;
+
+                       ext->ex_startblock = tmp_startblock;
+                       ext->ex_blockcount = tmp_blockcount;
+                       ext->ex_state = tmp_state;
+
+                       current = top->next;
+                       prev = top;
+               }
+
+               prev->next = ext;
+               ext->next = current;
+
+               return;
+       }
+
+       if (avl_insert(extent_bcnt_ptrs[agno], (avlnode_t *) ext) == NULL)  {
+               do_error("xfs_repair:  duplicate bno extent range\n");
+       }
+
+       return;
+}
+
+extent_tree_node_t *
+findfirst_bcnt_extent(xfs_agnumber_t agno)
+{
+       ASSERT(extent_bcnt_ptrs != NULL);
+       ASSERT(extent_bcnt_ptrs[agno] != NULL);
+
+       return((extent_tree_node_t *) extent_bcnt_ptrs[agno]->avl_firstino);
+}
+
+extent_tree_node_t *
+findbiggest_bcnt_extent(xfs_agnumber_t agno)
+{
+       extern avlnode_t *avl_lastino(avlnode_t *root);
+
+       ASSERT(extent_bcnt_ptrs != NULL);
+       ASSERT(extent_bcnt_ptrs[agno] != NULL);
+
+       return((extent_tree_node_t *) avl_lastino(extent_bcnt_ptrs[agno]->avl_root));
+}
+
+extent_tree_node_t *
+findnext_bcnt_extent(xfs_agnumber_t agno, extent_tree_node_t *ext)
+{
+       avlnode_t *nextino;
+
+       if (ext->next != NULL)  {
+               ASSERT(ext->ex_blockcount == ext->next->ex_blockcount);
+               ASSERT(ext->ex_startblock < ext->next->ex_startblock);
+               return(ext->next);
+       } else  {
+               /*
+                * have to look at the top of the list to get the
+                * correct avl_nextino pointer since that pointer
+                * is maintained and altered by the AVL code.
+                */
+               nextino = avl_find(extent_bcnt_ptrs[agno], ext->ex_blockcount);
+               ASSERT(nextino != NULL);
+               if (nextino->avl_nextino != NULL)  {
+                       ASSERT(ext->ex_blockcount < ((extent_tree_node_t *)
+                                       nextino->avl_nextino)->ex_blockcount);
+               }
+               return((extent_tree_node_t *) nextino->avl_nextino);
+       }
+}
+
+/*
+ * this is meant to be called after you walk the bno tree to
+ * determine exactly which extent you want (so you'll know the
+ * desired value for startblock when you call this routine).
+ */
+extent_tree_node_t *
+get_bcnt_extent(xfs_agnumber_t agno, xfs_agblock_t startblock,
+               xfs_extlen_t blockcount)
+{
+       extent_tree_node_t      *ext, *prev, *top;
+       xfs_agblock_t           tmp_startblock;
+       xfs_extlen_t            tmp_blockcount;
+       extent_state_t          tmp_state;
+
+       prev = NULL;
+       ASSERT(extent_bcnt_ptrs != NULL);
+       ASSERT(extent_bcnt_ptrs[agno] != NULL);
+
+       if ((ext = (extent_tree_node_t *) avl_find(extent_bcnt_ptrs[agno],
+                                                       blockcount)) == NULL)
+               return(NULL);
+       
+       top = ext;
+
+       if (ext->next != NULL)  {
+               /*
+                * pull it off the list
+                */
+               while (ext != NULL && startblock != ext->ex_startblock)  {
+                       prev = ext;
+                       ext = ext->next;
+               }
+               ASSERT(ext != NULL);
+               if (ext == top)  {
+                       /*
+                        * this node is linked into the tree so we
+                        * swap the core values so we can delete
+                        * the next item on the list instead of
+                        * the head of the list.  This is because
+                        * the rest of the tree undoubtedly has
+                        * pointers to the piece of memory that
+                        * is the head of the list so pulling
+                        * the item out of the list and hence
+                        * the avl tree would be a bad idea.
+                        * 
+                        * (cheaper than the alternative, a tree
+                        * delete of this node followed by a tree
+                        * insert of the next node on the list).
+                        */
+                       tmp_startblock = ext->next->ex_startblock;
+                       tmp_blockcount = ext->next->ex_blockcount;
+                       tmp_state = ext->next->ex_state;
+
+                       ext->next->ex_startblock = ext->ex_startblock;
+                       ext->next->ex_blockcount = ext->ex_blockcount;
+                       ext->next->ex_state = ext->ex_state;
+
+                       ext->ex_startblock = tmp_startblock;
+                       ext->ex_blockcount = tmp_blockcount;
+                       ext->ex_state = tmp_state;
+
+                       ext = ext->next;
+                       prev = top;
+               }
+               /*
+                * now, a simple list deletion
+                */
+               prev->next = ext->next;
+               ext->next = NULL;
+       } else  {
+               /*
+                * no list, just one node.  simply delete
+                */
+               avl_delete(extent_bcnt_ptrs[agno], &ext->avl_node);
+       }
+
+       ASSERT(ext->ex_startblock == startblock);
+       ASSERT(ext->ex_blockcount == blockcount);
+       return(ext);
+}
+
+/*
+ * the next 2 routines manage the trees of duplicate extents -- 1 tree
+ * per AG
+ */
+void
+add_dup_extent(xfs_agnumber_t agno, xfs_agblock_t startblock,
+               xfs_extlen_t blockcount)
+{
+       extent_tree_node_t *first, *last, *ext, *next_ext;
+       xfs_agblock_t new_startblock;
+       xfs_extlen_t new_blockcount;
+
+       ASSERT(agno < glob_agcount);
+
+#ifdef XR_DUP_TRACE
+       fprintf(stderr, "Adding dup extent - %d/%d %d\n", agno, startblock, blockcount);
+#endif
+       avl_findranges(extent_tree_ptrs[agno], startblock - 1,
+               startblock + blockcount + 1,
+               (avlnode_t **) &first, (avlnode_t **) &last);
+       /*
+        * find adjacent and overlapping extent blocks
+        */
+       if (first == NULL && last == NULL)  {
+               /* nothing, just make and insert new extent */
+
+               ext = mk_extent_tree_nodes(startblock, blockcount, XR_E_MULT);
+
+               if (avl_insert(extent_tree_ptrs[agno],
+                               (avlnode_t *) ext) == NULL)  {
+                       do_error("xfs_repair:  duplicate extent range\n");
+               }
+
+               return;
+       }
+
+       ASSERT(first != NULL && last != NULL);
+
+       /*
+        * find the new composite range, delete old extent nodes
+        * as we go
+        */
+       new_startblock = startblock;
+       new_blockcount = blockcount;
+
+       for (ext = first;
+               ext != (extent_tree_node_t *) last->avl_node.avl_nextino;
+               ext = next_ext)  {
+               /*
+                * preserve the next inorder node
+                */
+               next_ext = (extent_tree_node_t *) ext->avl_node.avl_nextino;
+               /*
+                * just bail if the new extent is contained within an old one
+                */
+               if (ext->ex_startblock <= startblock && 
+                               ext->ex_blockcount >= blockcount)
+                       return;
+               /*
+                * now check for overlaps and adjacent extents
+                */
+               if (ext->ex_startblock + ext->ex_blockcount >= startblock
+                       || ext->ex_startblock <= startblock + blockcount)  {
+
+                       if (ext->ex_startblock < new_startblock)
+                               new_startblock = ext->ex_startblock;
+
+                       if (ext->ex_startblock + ext->ex_blockcount >
+                                       new_startblock + new_blockcount)
+                               new_blockcount = ext->ex_startblock +
+                                                       ext->ex_blockcount -
+                                                       new_startblock;
+
+                       avl_delete(extent_tree_ptrs[agno], (avlnode_t *) ext);
+                       continue;
+               }
+       }
+
+       ext = mk_extent_tree_nodes(new_startblock, new_blockcount, XR_E_MULT);
+
+       if (avl_insert(extent_tree_ptrs[agno], (avlnode_t *) ext) == NULL)  {
+               do_error("xfs_repair:  duplicate extent range\n");
+       }
+
+       return;
+}
+
+/*
+ * returns 1 if block is a dup, 0 if not
+ */
+/* ARGSUSED */
+int
+search_dup_extent(xfs_mount_t *mp, xfs_agnumber_t agno, xfs_agblock_t agbno)
+{
+       ASSERT(agno < glob_agcount);
+
+       if (avl_findrange(extent_tree_ptrs[agno], agbno) != NULL)
+               return(1);
+
+       return(0);
+}
+
+static __psunsigned_t
+avl_ext_start(avlnode_t *node)
+{
+       return((__psunsigned_t)
+               ((extent_tree_node_t *) node)->ex_startblock);
+}
+
+static __psunsigned_t
+avl_ext_end(avlnode_t *node)
+{
+       return((__psunsigned_t) (
+               ((extent_tree_node_t *) node)->ex_startblock +
+               ((extent_tree_node_t *) node)->ex_blockcount));
+}
+
+/*
+ * convert size to an address for the AVL tree code -- the bigger the size,
+ * the lower the address so the biggest extent will be first in the tree
+ */
+static __psunsigned_t
+avl_ext_bcnt_start(avlnode_t *node)
+{
+/*
+       return((__psunsigned_t) (BCNT_ADDR(((extent_tree_node_t *)
+                                               node)->ex_blockcount)));
+*/
+       return((__psunsigned_t) ((extent_tree_node_t *)node)->ex_blockcount);
+}
+
+static __psunsigned_t
+avl_ext_bcnt_end(avlnode_t *node)
+{
+/*
+       return((__psunsigned_t) (BCNT_ADDR(((extent_tree_node_t *)
+                                               node)->ex_blockcount)));
+*/
+       return((__psunsigned_t) ((extent_tree_node_t *)node)->ex_blockcount);
+}
+
+avlops_t avl_extent_bcnt_tree_ops = {
+       avl_ext_bcnt_start,
+       avl_ext_bcnt_end
+};
+
+avlops_t avl_extent_tree_ops = {
+       avl_ext_start,
+       avl_ext_end
+};
+
+/*
+ * for real-time extents -- have to dup code since realtime extent
+ * startblocks can be 64-bit values.
+ */
+static rt_extent_tree_node_t *
+mk_rt_extent_tree_nodes(xfs_drtbno_t new_startblock,
+       xfs_extlen_t new_blockcount, extent_state_t new_state)
+{
+       int i;
+       rt_extent_tree_node_t *new;
+       rt_extent_alloc_rec_t *rec;
+
+       if (rt_ext_flist.cnt == 0)  {
+               ASSERT(rt_ext_flist.list == NULL);
+
+               if ((rec = malloc(sizeof(rt_extent_alloc_rec_t))) == NULL)
+                       do_error("couldn't allocate new extent descriptors.\n");
+
+               record_allocation(&rec->alloc_rec, rt_ba_list);
+
+               new = &rec->extents[0];
+
+               for (i = 0; i < ALLOC_NUM_EXTS; i++)  {
+                       new->avl_node.avl_nextino = (avlnode_t *)
+                                                       rt_ext_flist.list;
+                       rt_ext_flist.list = new;
+                       rt_ext_flist.cnt++;
+                       new++;
+               }
+       }
+
+       ASSERT(rt_ext_flist.list != NULL);
+
+       new = rt_ext_flist.list;
+       rt_ext_flist.list = (rt_extent_tree_node_t *) new->avl_node.avl_nextino;
+       rt_ext_flist.cnt--;
+       new->avl_node.avl_nextino = NULL;
+
+       /* initialize node */
+
+       new->rt_startblock = new_startblock;
+       new->rt_blockcount = new_blockcount;
+       new->rt_state = new_state;
+
+       return(new);
+}
+
+#if 0
+void
+release_rt_extent_tree_node(rt_extent_tree_node_t *node)
+{
+       node->avl_node.avl_nextino = (avlnode_t *) rt_ext_flist.list;
+       rt_ext_flist.list = node;
+       rt_ext_flist.cnt++;
+
+       return;
+}
+
+void
+release_rt_extent_tree()
+{
+       extent_tree_node_t      *ext;
+       extent_tree_node_t      *tmp;
+       extent_tree_node_t      *lext;
+       extent_tree_node_t      *ltmp;
+       avl64tree_desc_t        *tree;
+
+       tree = rt_extent_tree_ptr;
+
+       if (tree->avl_firstino == NULL)
+               return;
+
+       ext = (extent_tree_node_t *) tree->avl_firstino;
+
+       while (ext != NULL)  {
+               tmp = (extent_tree_node_t *) ext->avl_node.avl_nextino;
+               release_rt_extent_tree_node(ext);
+               ext = tmp;
+       }
+
+       tree->avl_root = tree->avl_firstino = NULL;
+
+       return;
+}
+#endif
+
+/*
+ * don't need release functions for realtime tree teardown
+ * since we only have one tree, not one per AG
+ */
+/* ARGSUSED */
+void
+free_rt_dup_extent_tree(xfs_mount_t *mp)
+{
+       ASSERT(mp->m_sb.sb_rblocks != 0);
+
+       free_allocations(rt_ba_list);
+       free(rt_ext_tree_ptr);
+
+       rt_ba_list = NULL;
+       rt_ext_tree_ptr = NULL;
+
+       return;
+}
+
+/*
+ * add a duplicate real-time extent
+ */
+void
+add_rt_dup_extent(xfs_drtbno_t startblock, xfs_extlen_t blockcount)
+{
+       rt_extent_tree_node_t *first, *last, *ext, *next_ext;
+       xfs_drtbno_t new_startblock;
+       xfs_extlen_t new_blockcount;
+
+       avl64_findranges(rt_ext_tree_ptr, startblock - 1,
+               startblock + blockcount + 1,
+               (avl64node_t **) &first, (avl64node_t **) &last);
+       /*
+        * find adjacent and overlapping extent blocks
+        */
+       if (first == NULL && last == NULL)  {
+               /* nothing, just make and insert new extent */
+
+               ext = mk_rt_extent_tree_nodes(startblock,
+                               blockcount, XR_E_MULT);
+
+               if (avl64_insert(rt_ext_tree_ptr,
+                               (avl64node_t *) ext) == NULL)  {
+                       do_error("xfs_repair:  duplicate extent range\n");
+               }
+
+               return;
+       }
+
+       ASSERT(first != NULL && last != NULL);
+
+       /*
+        * find the new composite range, delete old extent nodes
+        * as we go
+        */
+       new_startblock = startblock;
+       new_blockcount = blockcount;
+
+       for (ext = first;
+               ext != (rt_extent_tree_node_t *) last->avl_node.avl_nextino;
+               ext = next_ext)  {
+               /*
+                * preserve the next inorder node
+                */
+               next_ext = (rt_extent_tree_node_t *) ext->avl_node.avl_nextino;
+               /*
+                * just bail if the new extent is contained within an old one
+                */
+               if (ext->rt_startblock <= startblock && 
+                               ext->rt_blockcount >= blockcount)
+                       return;
+               /*
+                * now check for overlaps and adjacent extents
+                */
+               if (ext->rt_startblock + ext->rt_blockcount >= startblock
+                       || ext->rt_startblock <= startblock + blockcount)  {
+
+                       if (ext->rt_startblock < new_startblock)
+                               new_startblock = ext->rt_startblock;
+
+                       if (ext->rt_startblock + ext->rt_blockcount >
+                                       new_startblock + new_blockcount)
+                               new_blockcount = ext->rt_startblock +
+                                                       ext->rt_blockcount -
+                                                       new_startblock;
+
+                       avl64_delete(rt_ext_tree_ptr, (avl64node_t *) ext);
+                       continue;
+               }
+       }
+
+       ext = mk_rt_extent_tree_nodes(new_startblock,
+                               new_blockcount, XR_E_MULT);
+
+       if (avl64_insert(rt_ext_tree_ptr, (avl64node_t *) ext) == NULL)  {
+               do_error("xfs_repair:  duplicate extent range\n");
+       }
+
+       return;
+}
+
+/*
+ * returns 1 if block is a dup, 0 if not
+ */
+/* ARGSUSED */
+int
+search_rt_dup_extent(xfs_mount_t *mp, xfs_drtbno_t bno)
+{
+       if (avl64_findrange(rt_ext_tree_ptr, bno) != NULL)
+               return(1);
+
+       return(0);
+}
+
+static __uint64_t
+avl64_rt_ext_start(avl64node_t *node)
+{
+       return(((rt_extent_tree_node_t *) node)->rt_startblock);
+}
+
+static __uint64_t
+avl64_ext_end(avl64node_t *node)
+{
+       return(((rt_extent_tree_node_t *) node)->rt_startblock +
+               ((rt_extent_tree_node_t *) node)->rt_blockcount);
+}
+
+avl64ops_t avl64_extent_tree_ops = {
+       avl64_rt_ext_start,
+       avl64_ext_end
+};
+
+void
+incore_ext_init(xfs_mount_t *mp)
+{
+       int i;
+       xfs_agnumber_t agcount = mp->m_sb.sb_agcount;
+
+       ba_list = NULL;
+       rt_ba_list = NULL;
+
+       if ((extent_tree_ptrs = malloc(agcount *
+                                       sizeof(avltree_desc_t *))) == NULL)
+               do_error("couldn't malloc dup extent tree descriptor table\n");
+
+       if ((extent_bno_ptrs = malloc(agcount *
+                                       sizeof(avltree_desc_t *))) == NULL)
+               do_error("couldn't malloc free by-bno extent tree descriptor table\n");
+
+       if ((extent_bcnt_ptrs = malloc(agcount *
+                                       sizeof(avltree_desc_t *))) == NULL)
+               do_error("couldn't malloc free by-bcnt extent tree descriptor table\n");
+
+       for (i = 0; i < agcount; i++)  {
+               if ((extent_tree_ptrs[i] =
+                               malloc(sizeof(avltree_desc_t))) == NULL)
+                       do_error("couldn't malloc dup extent tree descriptor\n");
+               if ((extent_bno_ptrs[i] =
+                               malloc(sizeof(avltree_desc_t))) == NULL)
+                       do_error("couldn't malloc bno extent tree descriptor\n");
+               if ((extent_bcnt_ptrs[i] =
+                               malloc(sizeof(avltree_desc_t))) == NULL)
+                       do_error("couldn't malloc bcnt extent tree descriptor\n");
+       }
+
+       for (i = 0; i < agcount; i++)  {
+               avl_init_tree(extent_tree_ptrs[i], &avl_extent_tree_ops);
+               avl_init_tree(extent_bno_ptrs[i], &avl_extent_tree_ops);
+               avl_init_tree(extent_bcnt_ptrs[i], &avl_extent_bcnt_tree_ops);
+       }
+
+       if ((rt_ext_tree_ptr = malloc(sizeof(avltree_desc_t))) == NULL)
+               do_error("couldn't malloc dup rt extent tree descriptor\n");
+
+       avl64_init_tree(rt_ext_tree_ptr, &avl64_extent_tree_ops);
+
+       ext_flist.cnt = 0;
+       ext_flist.list = NULL;
+
+       return;
+}
+
+/*
+ * this routine actually frees all the memory used to track per-AG trees
+ */
+void
+incore_ext_teardown(xfs_mount_t *mp)
+{
+       xfs_agnumber_t i;
+
+       free_allocations(ba_list);
+
+       for (i = 0; i < mp->m_sb.sb_agcount; i++)  {
+               free(extent_tree_ptrs[i]);
+               free(extent_bno_ptrs[i]);
+               free(extent_bcnt_ptrs[i]);
+       }
+
+       free(extent_bcnt_ptrs);
+       free(extent_bno_ptrs);
+       free(extent_tree_ptrs);
+
+       extent_bcnt_ptrs = extent_bno_ptrs = extent_tree_ptrs = NULL;
+
+       return;
+}
+
+int
+count_extents(xfs_agnumber_t agno, avltree_desc_t *tree, int whichtree)
+{
+       extent_tree_node_t *node;
+       int i = 0;
+
+       node = (extent_tree_node_t *) tree->avl_firstino;
+
+       while (node != NULL)  {
+               i++;
+               if (whichtree)
+                       node = findnext_bcnt_extent(agno, node);
+               else
+                       node = findnext_bno_extent(node);
+       }
+
+       return(i);
+}
+
+int
+count_bno_extents_blocks(xfs_agnumber_t agno, uint *numblocks)
+{
+       __uint64_t nblocks;
+       extent_tree_node_t *node;
+       int i = 0;
+
+       ASSERT(agno < glob_agcount);
+
+       nblocks = 0;
+
+       node = (extent_tree_node_t *) extent_bno_ptrs[agno]->avl_firstino;
+
+       while (node != NULL) {
+               nblocks += node->ex_blockcount;
+               i++;
+               node = findnext_bno_extent(node);
+       }
+
+       *numblocks = nblocks;
+       return(i);
+}
+
+int
+count_bno_extents(xfs_agnumber_t agno)
+{
+       ASSERT(agno < glob_agcount);
+       return(count_extents(agno, extent_bno_ptrs[agno], 0));
+}
+
+int
+count_bcnt_extents(xfs_agnumber_t agno)
+{
+       ASSERT(agno < glob_agcount);
+       return(count_extents(agno, extent_bcnt_ptrs[agno], 1));
+}
diff --git a/repair/incore_ino.c b/repair/incore_ino.c
new file mode 100644 (file)
index 0000000..bb14a37
--- /dev/null
@@ -0,0 +1,834 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include <malloc.h>
+#include "avl.h"
+#include "globals.h"
+#include "incore.h"
+#include "agheader.h"
+#include "protos.h"
+#include "err_protos.h"
+
+extern avlnode_t       *avl_firstino(avlnode_t *root);
+
+/*
+ * array of inode tree ptrs, one per ag
+ */
+static avltree_desc_t  **inode_tree_ptrs;
+
+/*
+ * ditto for uncertain inodes
+ */
+static avltree_desc_t  **inode_uncertain_tree_ptrs;
+
+#define ALLOC_NUM_INOS         100
+
+/* free lists -- inode nodes and extent nodes */
+
+typedef struct ino_flist_s  {
+       ino_tree_node_t         *list;
+       ino_tree_node_t         *last;
+       long long               cnt;
+} ino_flist_t;
+
+static ino_flist_t ino_flist;  /* free list must be initialized before use */
+
+/*
+ * next is the uncertain inode list -- a sorted (in ascending order)
+ * list of inode records sorted on the starting inode number.  There
+ * is one list per ag.
+ */
+
+/*
+ * common code for creating inode records for use by trees and lists.
+ * called only from add_inodes and add_inodes_uncertain
+ *
+ * IMPORTANT:  all inodes (inode records) start off as free and
+ *             unconfirmed.
+ */
+/* ARGSUSED */
+static ino_tree_node_t *
+mk_ino_tree_nodes(xfs_agino_t starting_ino)
+{
+       int i;
+       ino_tree_node_t *new;
+       avlnode_t *node;
+
+       if (ino_flist.cnt == 0)  {
+               ASSERT(ino_flist.list == NULL);
+
+               if ((new = malloc(sizeof(ino_tree_node_t[ALLOC_NUM_INOS])))
+                                       == NULL)
+                       do_error("inode map malloc failed\n");
+
+               for (i = 0; i < ALLOC_NUM_INOS; i++)  {
+                       new->avl_node.avl_nextino =
+                               (avlnode_t *) ino_flist.list;
+                       ino_flist.list = new;
+                       ino_flist.cnt++;
+                       new++;
+               }
+       }
+
+       ASSERT(ino_flist.list != NULL);
+
+       new = ino_flist.list;
+       ino_flist.list = (ino_tree_node_t *) new->avl_node.avl_nextino;
+       ino_flist.cnt--;
+       node = &new->avl_node;
+       node->avl_nextino = node->avl_forw = node->avl_back = NULL;
+
+       /* initialize node */
+
+       new->ino_startnum = 0;
+       new->ino_confirmed = 0;
+       new->ino_isa_dir = 0;
+       new->ir_free = (xfs_inofree_t) - 1;
+       new->ino_un.backptrs = NULL;
+
+       return(new);
+}
+
+/*
+ * return inode record to free list, will be initialized when
+ * it gets pulled off list
+ */
+static void
+free_ino_tree_node(ino_tree_node_t *ino_rec)
+{
+       ino_rec->avl_node.avl_nextino = NULL;
+       ino_rec->avl_node.avl_forw = NULL;
+       ino_rec->avl_node.avl_back = NULL;
+
+       if (ino_flist.list != NULL)  {
+               ASSERT(ino_flist.cnt > 0);
+               ino_rec->avl_node.avl_nextino = (avlnode_t *) ino_flist.list;
+       } else  {
+               ASSERT(ino_flist.cnt == 0);
+               ino_rec->avl_node.avl_nextino = NULL;
+       }
+
+       ino_flist.list = ino_rec;
+       ino_flist.cnt++;
+
+       if (ino_rec->ino_un.backptrs != NULL)  {
+               if (full_backptrs && ino_rec->ino_un.backptrs->parents != NULL)
+                       free(ino_rec->ino_un.backptrs->parents);
+               if (ino_rec->ino_un.plist != NULL)
+                       free(ino_rec->ino_un.plist);
+       }
+
+       return;
+}
+
+/*
+ * last referenced cache for uncertain inodes
+ */
+static ino_tree_node_t **last_rec;
+
+/*
+ * ok, the uncertain inodes are a set of trees just like the
+ * good inodes but all starting inode records are (arbitrarily)
+ * aligned on XFS_CHUNK_PER_INODE boundaries to prevent overlaps.
+ * this means we may have partials records in the tree (e.g. records
+ * without 64 confirmed uncertain inodes).  Tough.
+ *
+ * free is set to 1 if the inode is thought to be free, 0 if used
+ */
+void
+add_aginode_uncertain(xfs_agnumber_t agno, xfs_agino_t ino, int free)
+{
+       ino_tree_node_t         *ino_rec;
+       xfs_agino_t             s_ino;
+       int                     offset;
+
+       ASSERT(agno < glob_agcount);
+       ASSERT(last_rec != NULL);
+
+       s_ino = rounddown(ino, XFS_INODES_PER_CHUNK);
+
+       /*
+        * check for a cache hit
+        */
+       if (last_rec[agno] != NULL && last_rec[agno]->ino_startnum == s_ino)  {
+               offset = ino - s_ino;
+               if (free)
+                       set_inode_free(last_rec[agno], offset);
+               else
+                       set_inode_used(last_rec[agno], offset);
+
+               return;
+       }
+
+       /*
+        * check to see if record containing inode is already in the tree.
+        * if not, add it
+        */
+       if ((ino_rec = (ino_tree_node_t *)
+                       avl_findrange(inode_uncertain_tree_ptrs[agno],
+                               s_ino)) == NULL)  {
+               ino_rec = mk_ino_tree_nodes(s_ino);
+               ino_rec->ino_startnum = s_ino;
+
+               if (avl_insert(inode_uncertain_tree_ptrs[agno],
+                               (avlnode_t *) ino_rec) == NULL)  {
+                       do_error("xfs_repair:  duplicate inode range\n");
+               }
+       }
+
+       if (free)
+               set_inode_free(ino_rec, ino - s_ino);
+       else
+               set_inode_used(ino_rec, ino - s_ino);
+
+       /*
+        * set cache entry
+        */
+       last_rec[agno] = ino_rec;
+
+       return;
+}
+
+/*
+ * like add_aginode_uncertain() only it needs an xfs_mount_t *
+ * to perform the inode number conversion.
+ */
+void
+add_inode_uncertain(xfs_mount_t *mp, xfs_ino_t ino, int free)
+{
+       add_aginode_uncertain(XFS_INO_TO_AGNO(mp, ino),
+                               XFS_INO_TO_AGINO(mp, ino), free);
+}
+
+/*
+ * pull the indicated inode record out of the uncertain inode tree
+ */
+void
+get_uncertain_inode_rec(xfs_agnumber_t agno, ino_tree_node_t *ino_rec)
+{
+       ASSERT(inode_tree_ptrs != NULL);
+       ASSERT(inode_tree_ptrs[agno] != NULL);
+
+       avl_delete(inode_uncertain_tree_ptrs[agno], &ino_rec->avl_node);
+
+       ino_rec->avl_node.avl_nextino = NULL;
+       ino_rec->avl_node.avl_forw = NULL;
+       ino_rec->avl_node.avl_back = NULL;
+}
+
+ino_tree_node_t *
+findfirst_uncertain_inode_rec(xfs_agnumber_t agno)
+{
+       return((ino_tree_node_t *)
+               inode_uncertain_tree_ptrs[agno]->avl_firstino);
+}
+
+void
+clear_uncertain_ino_cache(xfs_agnumber_t agno)
+{
+       last_rec[agno] = NULL;
+
+       return;
+}
+
+
+/*
+ * next comes the inode trees.  One per ag.  AVL trees
+ * of inode records, each inode record tracking 64 inodes
+ */
+/*
+ * set up an inode tree record for a group of inodes that will
+ * include the requested inode.
+ *
+ * does NOT error-check for duplicate records.  Caller is
+ * responsible for checking that.
+ *
+ * ino must be the start of an XFS_INODES_PER_CHUNK (64) inode chunk
+ *
+ * Each inode resides in a 64-inode chunk which can be part
+ * one or more chunks (MAX(64, inodes-per-block).  The fs allocates
+ * in chunks (as opposed to 1 chunk) when a block can hold more than
+ * one chunk (inodes per block > 64).  Allocating in one chunk pieces
+ * causes us problems when it takes more than one fs block to contain
+ * an inode chunk because the chunks can start on *any* block boundary.
+ * So we assume that the caller has a clue because at this level, we
+ * don't.
+ */
+static ino_tree_node_t *
+add_inode(xfs_agnumber_t agno, xfs_agino_t ino)
+{
+       ino_tree_node_t *ino_rec;
+
+       /* no record exists, make some and put them into the tree */
+
+       ino_rec = mk_ino_tree_nodes(ino);
+       ino_rec->ino_startnum = ino;
+
+       if (avl_insert(inode_tree_ptrs[agno],
+                       (avlnode_t *) ino_rec) == NULL)  {
+               do_error("xfs_repair:  duplicate inode range\n");
+       }
+
+       return(ino_rec);
+}
+
+/*
+ * pull the indicated inode record out of the inode tree
+ */
+void
+get_inode_rec(xfs_agnumber_t agno, ino_tree_node_t *ino_rec)
+{
+       ASSERT(inode_tree_ptrs != NULL);
+       ASSERT(inode_tree_ptrs[agno] != NULL);
+
+       avl_delete(inode_tree_ptrs[agno], &ino_rec->avl_node);
+
+       ino_rec->avl_node.avl_nextino = NULL;
+       ino_rec->avl_node.avl_forw = NULL;
+       ino_rec->avl_node.avl_back = NULL;
+}
+
+/*
+ * free the designated inode record (return it to the free pool)
+ */
+/* ARGSUSED */
+void
+free_inode_rec(xfs_agnumber_t agno, ino_tree_node_t *ino_rec)
+{
+       free_ino_tree_node(ino_rec);
+
+       return;
+}
+
+/*
+ * returns the inode record desired containing the inode
+ * returns NULL if inode doesn't exist.  The tree-based find
+ * routines do NOT pull records out of the tree.
+ */
+ino_tree_node_t *
+find_inode_rec(xfs_agnumber_t agno, xfs_agino_t ino)
+{
+       return((ino_tree_node_t *)
+               avl_findrange(inode_tree_ptrs[agno], ino));
+}
+
+void
+find_inode_rec_range(xfs_agnumber_t agno, xfs_agino_t start_ino,
+                       xfs_agino_t end_ino, ino_tree_node_t **first,
+                       ino_tree_node_t **last)
+{
+       *first = *last = NULL;
+
+       avl_findranges(inode_tree_ptrs[agno], start_ino,
+               end_ino, (avlnode_t **) first, (avlnode_t **) last);
+       return;
+}
+
+/*
+ * if ino doesn't exist, it must be properly aligned -- on a
+ * filesystem block boundary or XFS_INODES_PER_CHUNK boundary,
+ * whichever alignment is larger.
+ */
+ino_tree_node_t *
+set_inode_used_alloc(xfs_agnumber_t agno, xfs_agino_t ino)
+{
+       ino_tree_node_t *ino_rec;
+
+       /*
+        * check alignment -- the only way to detect this
+        * is too see if the chunk overlaps another chunk
+        * already in the tree
+        */
+       ino_rec = add_inode(agno, ino);
+
+       ASSERT(ino_rec != NULL);
+       ASSERT(ino >= ino_rec->ino_startnum &&
+               ino - ino_rec->ino_startnum < XFS_INODES_PER_CHUNK);
+
+       set_inode_used(ino_rec, ino - ino_rec->ino_startnum);
+
+       return(ino_rec);
+}
+
+ino_tree_node_t *
+set_inode_free_alloc(xfs_agnumber_t agno, xfs_agino_t ino)
+{
+       ino_tree_node_t *ino_rec;
+
+       ino_rec = add_inode(agno, ino);
+
+       ASSERT(ino_rec != NULL);
+       ASSERT(ino >= ino_rec->ino_startnum &&
+               ino - ino_rec->ino_startnum < XFS_INODES_PER_CHUNK);
+
+       set_inode_free(ino_rec, ino - ino_rec->ino_startnum);
+
+       return(ino_rec);
+}
+
+ino_tree_node_t *
+findfirst_inode_rec(xfs_agnumber_t agno)
+{
+       return((ino_tree_node_t *) inode_tree_ptrs[agno]->avl_firstino);
+}
+
+void
+print_inode_list_int(xfs_agnumber_t agno, int uncertain)
+{
+       ino_tree_node_t *ino_rec;
+
+       if (!uncertain)  {
+               fprintf(stderr, "good inode list is --\n");
+               ino_rec = findfirst_inode_rec(agno);
+       } else  {
+               fprintf(stderr, "uncertain inode list is --\n");
+               ino_rec = findfirst_uncertain_inode_rec(agno);
+       }
+
+       if (ino_rec == NULL)  {
+               fprintf(stderr, "agno %d -- no inodes\n", agno);
+               return;
+       }
+
+       printf("agno %d\n", agno);
+
+       while(ino_rec != NULL)  {
+               fprintf(stderr,
+       "\tptr = %p, start = 0x%x, free = 0x%llx, confirmed = 0x%llx\n",
+                       ino_rec,
+                       ino_rec->ino_startnum,
+                       ino_rec->ir_free,
+                       ino_rec->ino_confirmed);
+               if (ino_rec->ino_startnum == 0)
+                       ino_rec = ino_rec;
+               ino_rec = next_ino_rec(ino_rec);
+       }
+}
+
+void
+print_inode_list(xfs_agnumber_t agno)
+{
+       print_inode_list_int(agno, 0);
+}
+
+void
+print_uncertain_inode_list(xfs_agnumber_t agno)
+{
+       print_inode_list_int(agno, 1);
+}
+
+/*
+ * set parent -- use a bitmask and a packed array.  The bitmask
+ * indicate which inodes have an entry in the array.  An inode that
+ * is the Nth bit set in the mask is stored in the Nth location in
+ * the array where N starts at 0.
+ */
+void
+set_inode_parent(ino_tree_node_t *irec, int offset, xfs_ino_t parent)
+{
+       int             i;
+       int             cnt;
+       int             target;
+       __uint64_t      bitmask;
+       parent_entry_t  *tmp;
+
+       ASSERT(full_backptrs == 0);
+
+       if (irec->ino_un.plist == NULL)  {
+                irec->ino_un.plist = 
+                        (parent_list_t*)malloc(sizeof(parent_list_t));
+                if (!irec->ino_un.plist)
+                       do_error("couldn't malloc parent list table\n");
+                
+               irec->ino_un.plist->pmask = 1LL << offset;
+               irec->ino_un.plist->pentries = 
+                        (xfs_ino_t*)memalign(sizeof(xfs_ino_t), sizeof(xfs_ino_t));
+                if (!irec->ino_un.plist->pentries)
+                        do_error("couldn't memalign pentries table\n");
+#ifdef DEBUG
+               irec->ino_un.plist->cnt = 1;
+#endif
+               irec->ino_un.plist->pentries[0] = parent;
+
+               return;
+       }
+
+       if (irec->ino_un.plist->pmask & (1LL << offset))  {
+               bitmask = 1LL;
+               target = 0;
+
+               for (i = 0; i < offset; i++)  {
+                       if (irec->ino_un.plist->pmask & bitmask)
+                               target++;
+                       bitmask <<= 1;
+               }
+#ifdef DEBUG
+               ASSERT(target < irec->ino_un.plist->cnt);
+#endif
+               irec->ino_un.plist->pentries[target] = parent;
+
+               return;
+       }
+
+       bitmask = 1LL;
+       cnt = target = 0;
+
+       for (i = 0; i < XFS_INODES_PER_CHUNK; i++)  {
+               if (irec->ino_un.plist->pmask & bitmask)  {
+                       cnt++;
+                       if (i < offset)
+                               target++;
+               }
+
+               bitmask <<= 1;
+       }
+
+#ifdef DEBUG
+       ASSERT(cnt == irec->ino_un.plist->cnt);
+#endif
+       ASSERT(cnt >= target);
+
+       tmp = (xfs_ino_t*)memalign(sizeof(xfs_ino_t), (cnt + 1) * sizeof(xfs_ino_t));
+        if (!tmp)
+                do_error("couldn't memalign pentries table\n");
+
+       (void) bcopy(irec->ino_un.plist->pentries, tmp,
+                       target * sizeof(parent_entry_t));
+
+       if (cnt > target)
+               (void) bcopy(irec->ino_un.plist->pentries + target,
+                               tmp + target + 1,
+                               (cnt - target) * sizeof(parent_entry_t));
+
+       free(irec->ino_un.plist->pentries);
+
+       irec->ino_un.plist->pentries = tmp;
+
+#ifdef DEBUG
+       irec->ino_un.plist->cnt++;
+#endif
+       irec->ino_un.plist->pentries[target] = parent;
+       irec->ino_un.plist->pmask |= (1LL << offset);
+
+       return;
+}
+
+#if 0
+/*
+ * not needed for now since we don't set the parent info
+ * until phase 4 -- at which point we know that the directory
+ * inode won't be going away -- so we won't ever need to clear
+ * directory parent data that we set.
+ */
+void
+clear_inode_parent(ino_tree_node_t *irec, int offset)
+{
+       ASSERT(full_backptrs == 0);
+       ASSERT(irec->ino_un.plist != NULL);
+
+       return;
+}
+#endif
+
+xfs_ino_t
+get_inode_parent(ino_tree_node_t *irec, int offset)
+{
+       __uint64_t      bitmask;
+       parent_list_t   *ptbl;
+       int             i;
+       int             target;
+
+       if (full_backptrs)
+               ptbl = irec->ino_un.backptrs->parents;
+       else
+               ptbl = irec->ino_un.plist;
+
+       if (ptbl->pmask & (1LL << offset))  {
+               bitmask = 1LL;
+               target = 0;
+
+               for (i = 0; i < offset; i++)  {
+                       if (ptbl->pmask & bitmask)
+                               target++;
+                       bitmask <<= 1;
+               }
+#ifdef DEBUG
+               ASSERT(target < ptbl->cnt);
+#endif
+               return(ptbl->pentries[target]);
+       }
+
+       return(0LL);
+}
+
+/*
+ * code that deals with the inode descriptor appendages -- the back
+ * pointers, link counts and reached bits for phase 6 and phase 7.
+ */
+
+void
+add_inode_reached(ino_tree_node_t *ino_rec, int ino_offset)
+{
+       ASSERT(ino_rec->ino_un.backptrs != NULL);
+
+       ino_rec->ino_un.backptrs->nlinks[ino_offset]++;
+       XFS_INO_RCHD_SET_RCHD(ino_rec, ino_offset);
+
+       ASSERT(is_inode_reached(ino_rec, ino_offset));
+
+       return;
+}
+
+int
+is_inode_reached(ino_tree_node_t *ino_rec, int ino_offset)
+{
+       ASSERT(ino_rec->ino_un.backptrs != NULL);
+       return(XFS_INO_RCHD_IS_RCHD(ino_rec, ino_offset));
+}
+
+void
+add_inode_ref(ino_tree_node_t *ino_rec, int ino_offset)
+{
+       ASSERT(ino_rec->ino_un.backptrs != NULL);
+
+       ino_rec->ino_un.backptrs->nlinks[ino_offset]++;
+
+       return;
+}
+
+void
+drop_inode_ref(ino_tree_node_t *ino_rec, int ino_offset)
+{
+       ASSERT(ino_rec->ino_un.backptrs != NULL);
+       ASSERT(ino_rec->ino_un.backptrs->nlinks[ino_offset] > 0);
+
+       if (--ino_rec->ino_un.backptrs->nlinks[ino_offset] == 0)
+               XFS_INO_RCHD_CLR_RCHD(ino_rec, ino_offset);
+
+       return;
+}
+
+int
+is_inode_referenced(ino_tree_node_t *ino_rec, int ino_offset)
+{
+       ASSERT(ino_rec->ino_un.backptrs != NULL);
+       return(ino_rec->ino_un.backptrs->nlinks[ino_offset] > 0);
+}
+
+__uint32_t
+num_inode_references(ino_tree_node_t *ino_rec, int ino_offset)
+{
+       ASSERT(ino_rec->ino_un.backptrs != NULL);
+       return(ino_rec->ino_un.backptrs->nlinks[ino_offset]);
+}
+
+#if 0
+static backptrs_t      *bptrs;
+static int             bptrs_index;
+#define BPTR_ALLOC_NUM 1000
+
+backptrs_t *
+get_backptr(void)
+{
+       backptrs_t *bptr;
+
+       if (bptrs_index == BPTR_ALLOC_NUM)  {
+               ASSERT(bptrs == NULL);
+
+               if ((bptrs = malloc(sizeof(backptrs_t[BPTR_ALLOC_NUM])))
+                               == NULL)  {
+                       do_error("couldn't malloc ino rec backptrs.\n");
+               }
+
+               bptrs_index = 0;
+       }
+
+       ASSERT(bptrs != NULL);
+
+       bptr = &bptrs[bptrs_index];
+       bptrs_index++;
+
+       if (bptrs_index == BPTR_ALLOC_NUM)
+               bptrs = NULL;
+
+       bzero(bptr, sizeof(backptrs_t));
+
+       return(bptr);
+}
+#endif
+
+backptrs_t *
+get_backptr(void)
+{
+       backptrs_t *ptr;
+
+       if ((ptr = malloc(sizeof(backptrs_t))) == NULL)
+               do_error("could not malloc back pointer table\n");
+       
+       bzero(ptr, sizeof(backptrs_t));
+
+       return(ptr);
+}
+
+void
+add_ino_backptrs(xfs_mount_t *mp)
+{
+#ifdef XR_BCKPTR_DBG
+       xfs_ino_t ino;
+       int j, k;
+#endif /* XR_BCKPTR_DBG */
+       ino_tree_node_t *ino_rec;
+       parent_list_t *tmp;
+       xfs_agnumber_t i;
+
+       for (i = 0; i < mp->m_sb.sb_agcount; i++)  {
+               ino_rec = findfirst_inode_rec(i);
+
+               while (ino_rec != NULL)  {
+                       tmp = ino_rec->ino_un.plist;
+                       ino_rec->ino_un.backptrs = get_backptr(); 
+                       ino_rec->ino_un.backptrs->parents = tmp;
+
+#ifdef XR_BCKPTR_DBG
+                       if (tmp != NULL)  {
+                               k = 0;
+                               for (j = 0; j < XFS_INODES_PER_CHUNK; j++)  {
+                                       ino = XFS_AGINO_TO_INO(mp, i,
+                                               ino_rec->ino_startnum + j);
+                                       if (ino == 25165846)  {
+                                               do_warn("THERE 1 !!!\n");
+                                       }
+                                       if (tmp->pentries[j] != 0)  {
+                                               k++;
+                                               do_warn(
+                                               "inode %llu - parent %llu\n",
+                                                       ino,
+                                                       tmp->pentries[j]);
+                                               if (ino == 25165846)  {
+                                                       do_warn("THERE!!!\n");
+                                               }
+                                       }
+                               }
+
+                               if (k != tmp->cnt)  {
+                                       do_warn(
+                                       "ERROR - count = %d, counted %d\n",
+                                               tmp->cnt, k);
+                               }
+                       }
+#endif /* XR_BCKPTR_DBG */
+                       ino_rec = next_ino_rec(ino_rec);
+               }
+       }
+
+       full_backptrs = 1;
+
+       return;
+}
+
+static __psunsigned_t
+avl_ino_start(avlnode_t *node)
+{
+       return((__psunsigned_t) ((ino_tree_node_t *) node)->ino_startnum);
+}
+
+static __psunsigned_t
+avl_ino_end(avlnode_t *node)
+{
+       return((__psunsigned_t) (
+               ((ino_tree_node_t *) node)->ino_startnum +
+               XFS_INODES_PER_CHUNK));
+}
+
+avlops_t avl_ino_tree_ops = {
+       avl_ino_start,
+       avl_ino_end
+};
+
+void
+incore_ino_init(xfs_mount_t *mp)
+{
+       int i;
+       int agcount = mp->m_sb.sb_agcount;
+
+       if ((inode_tree_ptrs = malloc(agcount *
+                                       sizeof(avltree_desc_t *))) == NULL)
+               do_error("couldn't malloc inode tree descriptor table\n");
+       if ((inode_uncertain_tree_ptrs = malloc(agcount *
+                                       sizeof(avltree_desc_t *))) == NULL)
+               do_error("couldn't malloc uncertain ino tree descriptor table\n");
+
+       for (i = 0; i < agcount; i++)  {
+               if ((inode_tree_ptrs[i] =
+                               malloc(sizeof(avltree_desc_t))) == NULL)
+                       do_error("couldn't malloc inode tree descriptor\n");
+               if ((inode_uncertain_tree_ptrs[i] =
+                               malloc(sizeof(avltree_desc_t))) == NULL)
+                       do_error(
+                       "couldn't malloc uncertain ino tree descriptor\n");
+       }
+       for (i = 0; i < agcount; i++)  {
+               avl_init_tree(inode_tree_ptrs[i], &avl_ino_tree_ops);
+               avl_init_tree(inode_uncertain_tree_ptrs[i], &avl_ino_tree_ops);
+       }
+
+       ino_flist.cnt = 0;
+       ino_flist.list = NULL;
+
+       if ((last_rec = malloc(sizeof(ino_tree_node_t *) * agcount)) == NULL)
+               do_error("couldn't malloc uncertain inode cache area\n");
+
+       bzero(last_rec, sizeof(ino_tree_node_t *) * agcount);
+
+       full_backptrs = 0;
+
+       return;
+}
+
+#ifdef XR_INO_REF_DEBUG
+void
+add_inode_refchecked(xfs_ino_t ino, ino_tree_node_t *ino_rec, int ino_offset)
+{
+       XFS_INOPROC_SET_PROC((ino_rec), (ino_offset));
+
+       ASSERT(is_inode_refchecked(ino, ino_rec, ino_offset));
+
+       return;
+}
+
+int
+is_inode_refchecked(xfs_ino_t ino, ino_tree_node_t *ino_rec, int ino_offset)
+{
+       return(XFS_INOPROC_IS_PROC(ino_rec, ino_offset) == 0LL ? 0 : 1);
+}
+#endif /* XR_INO_REF_DEBUG */
diff --git a/repair/init.c b/repair/init.c
new file mode 100644 (file)
index 0000000..cc61650
--- /dev/null
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "globals.h"
+#include "agheader.h"
+#include "protos.h"
+#include "err_protos.h"
+
+void
+xfs_init(libxfs_init_t *args)
+{
+       memset(args, 0, sizeof(libxfs_init_t));
+
+       if (isa_file)  {
+               args->disfile = 1;
+               args->dname = fs_name;
+               args->volname = NULL;
+       } else  {
+               args->disfile = 0;
+               args->volname = fs_name;
+               args->dname = NULL;
+       }
+
+       if (log_spec)  {        /* External log specified */
+               args->logname = log_name;
+               args->lisfile = (isa_file?1:0);
+               /* XXX assume data file also means log file */
+               /* REVISIT: Need to do fs sanity / log validity checking */
+       }
+
+       args->notvolmsg = "you should never get this message - %s";
+       args->notvolok = 1;
+
+       if (no_modify)
+               args->isreadonly = (LIBXFS_ISREADONLY | LIBXFS_ISINACTIVE);
+
+       if (!libxfs_init(args))
+               do_error("couldn't initialize XFS library\n");
+}
diff --git a/repair/io.c b/repair/io.c
new file mode 100644 (file)
index 0000000..0b400ce
--- /dev/null
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <fcntl.h>
+#include <libxfs.h>
+#include "globals.h"
+#include "agheader.h"
+#include "protos.h"
+#include "err_protos.h"
+
+void
+io_init(void)
+{
+       int i;
+
+       /* open up filesystem device */
+
+       ASSERT(fs_name != NULL && *fs_name != '\0');
+
+       if ((fs_fd = open (fs_name, O_RDWR)) < 0)  {
+               do_error("couldn't open filesystem \"%s\"\n",
+                        fs_name);
+       }
+
+       /* initialize i/o buffers */
+
+       iobuf_size = 1000 * 1024;
+       smallbuf_size = 4 * 4096;       /* enough for an ag */
+
+       /*
+        * sbbuf_size must be < XFS_MIN_AG_BLOCKS (64) * smallest block size,
+        * otherwise you might get an EOF when reading in the sb/agf from
+        * the last ag if that ag is small
+        */
+       sbbuf_size = 2 * 4096;          /* 2 * max sector size */
+
+       if ((iobuf = malloc(iobuf_size)) == NULL)
+               do_error("couldn't malloc io buffer\n");
+
+       if ((smallbuf = malloc(smallbuf_size)) == NULL)
+               do_error("couldn't malloc secondary io buffer\n");
+
+       for (i = 0; i < NUM_SBS; i++)  {
+               if ((sb_bufs[i] = malloc(sbbuf_size)) == NULL)
+                       do_error("couldn't malloc sb io buffers\n");
+       }
+}
diff --git a/repair/phase1.c b/repair/phase1.c
new file mode 100644 (file)
index 0000000..a3bc895
--- /dev/null
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include <malloc.h>
+#include "globals.h"
+#include "agheader.h"
+#include "protos.h"
+#include "err_protos.h"
+
+void
+no_sb(void)
+{
+       do_warn("Sorry, could not find valid secondary superblock\n");
+       do_warn("Exiting now.\n");
+       exit(1);
+}
+
+char *
+alloc_ag_buf(int size)
+{
+       char    *bp;
+
+        bp = (char *)memalign(MEM_ALIGN, size);
+        if (!bp)
+               do_error("could not allocate ag header buffer (%d bytes)\n",
+                       size);
+       return(bp);
+}
+
+/*
+ * this has got to be big enough to hold 4 sectors
+ */
+#define MAX_SECTSIZE           (512 * 1024)
+
+/* ARGSUSED */
+void
+phase1(xfs_mount_t *mp)
+{
+       xfs_sb_t                *sb;
+       char                    *ag_bp;
+       int                     rval;
+
+       io_init();
+
+       do_log("Phase 1 - find and verify superblock...\n");
+
+       primary_sb_modified = 0;
+       need_root_inode = 0;
+       need_root_dotdot = 0;
+       need_rbmino = 0;
+       need_rsumino = 0;
+       lost_quotas = 0;
+       old_orphanage_ino = (xfs_ino_t) 0;
+
+       /*
+        * get AG 0 into ag header buf
+        */
+       ag_bp = alloc_ag_buf(MAX_SECTSIZE);
+       sb = (xfs_sb_t *) ag_bp;
+
+       if (get_sb(sb, 0LL, MAX_SECTSIZE, 0) == XR_EOF)  {
+               do_error("error reading primary superblock\n");
+       }
+
+       /*
+        * is this really an sb, verify internal consistency
+        */
+       if ((rval = verify_sb(sb, 1)) != XR_OK)  {
+               do_warn("bad primary superblock - %s !!!\n",
+                       err_string(rval));
+               if (!find_secondary_sb(sb))
+                       no_sb();
+               primary_sb_modified = 1;
+       } else if ((rval = verify_set_primary_sb(sb, 0,
+                                       &primary_sb_modified)) != XR_OK)  {
+               do_warn("couldn't verify primary superblock - %s !!!\n",
+                       err_string(rval));
+               if (!find_secondary_sb(sb))
+                       no_sb();
+               primary_sb_modified = 1;
+       }
+       
+       if (primary_sb_modified)  {
+               if (!no_modify)  {
+                       do_warn("writing modified primary superblock\n");
+                       write_primary_sb(sb, sb->sb_sectsize);
+               } else  {
+                       do_warn("would write modified primary superblock\n");
+               }
+       }
+
+       /*
+        * misc. global var initialization
+        */
+       sb_ifree = sb_icount = sb_fdblocks = sb_frextents = 0;
+
+       free(sb);
+}
diff --git a/repair/phase2.c b/repair/phase2.c
new file mode 100644 (file)
index 0000000..a906892
--- /dev/null
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "avl.h"
+#include "globals.h"
+#include "agheader.h"
+#include "protos.h"
+#include "err_protos.h"
+#include "incore.h"
+
+void   set_mp(xfs_mount_t *mpp);
+void   scan_ag(xfs_agnumber_t agno);
+
+static void
+zero_log(xfs_mount_t *mp, libxfs_init_t *args)
+{
+        int logdev = (mp->m_sb.sb_logstart == 0) ? args->logdev : args->ddev;
+        
+       libxfs_log_clear(logdev, 
+               XFS_FSB_TO_DADDR(mp, mp->m_sb.sb_logstart),
+               (xfs_extlen_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks),
+                &mp->m_sb.sb_uuid,
+                XLOG_FMT);
+}
+
+/*
+ * ok, at this point, the fs is mounted but the root inode may be
+ * trashed and the ag headers haven't been checked.  So we have
+ * a valid xfs_mount_t and superblock but that's about it.  That
+ * means we can use macros that use mount/sb fields in calculations
+ * but I/O or btree routines that depend on space maps or inode maps
+ * being correct are verboten.
+ */
+
+void
+phase2(xfs_mount_t *mp, libxfs_init_t *args)
+{
+       xfs_agnumber_t          i;
+       xfs_agblock_t           b;
+       int                     j;
+       ino_tree_node_t         *ino_rec;
+
+       /* now we can start using the buffer cache routines */
+       set_mp(mp);
+
+       /* Check whether this fs has internal or external log */
+       if (mp->m_sb.sb_logstart == 0) {
+               if (!args->logname) {
+                       fprintf (stderr,
+                               "This filesystem has an external log.  "
+                               "Specify log device with the -l option.\n");
+                       exit (1);
+               }
+               
+               fprintf (stderr, "Phase 2 - using external log on %s\n", 
+                        args->logname);
+       } else
+               fprintf (stderr, "Phase 2 - using internal log\n");
+
+       /* Zero log if applicable */
+       if (!no_modify)  {
+               do_log("        - zero log...\n");
+               zero_log(mp, args);
+       }
+
+       do_log("        - scan filesystem freespace and inode maps...\n");
+
+       /*
+        * account for space used by ag headers and log if internal
+        */
+       set_bmap_log(mp);
+       set_bmap_fs(mp);
+
+       bad_ino_btree = 0;
+
+       for (i = 0; i < mp->m_sb.sb_agcount; i++)  {
+               scan_ag(i);
+#ifdef XR_INODE_TRACE
+               print_inode_list(i);
+#endif
+       }
+
+       /*
+        * make sure we know about the root inode chunk
+        */
+       if ((ino_rec = find_inode_rec(0, mp->m_sb.sb_rootino)) == NULL)  {
+               ASSERT(mp->m_sb.sb_rbmino == mp->m_sb.sb_rootino + 1 &&
+                       mp->m_sb.sb_rsumino == mp->m_sb.sb_rootino + 2);
+               do_warn("root inode chunk not found\n");
+
+               /*
+                * mark the first 3 used, the rest are free
+                */
+               ino_rec = set_inode_used_alloc(0,
+                               (xfs_agino_t) mp->m_sb.sb_rootino);
+               set_inode_used(ino_rec, 1);
+               set_inode_used(ino_rec, 2);
+
+               for (j = 3; j < XFS_INODES_PER_CHUNK; j++)
+                       set_inode_free(ino_rec, j);
+
+               /*
+                * also mark blocks
+                */
+               for (b = 0; b < mp->m_ialloc_blks; b++)  {
+                       set_agbno_state(mp, 0,
+                               b + XFS_INO_TO_AGBNO(mp, mp->m_sb.sb_rootino),
+                               XR_E_INO);
+               }
+       } else  {
+               do_log("        - found root inode chunk\n");
+
+               /*
+                * blocks are marked, just make sure they're in use
+                */
+               if (is_inode_free(ino_rec, 0))  {
+                       do_warn("root inode marked free, ");
+                       set_inode_used(ino_rec, 0);
+                       if (!no_modify)
+                               do_warn("correcting\n");
+                       else
+                               do_warn("would correct\n");
+               }
+
+               if (is_inode_free(ino_rec, 1))  {
+                       do_warn("realtime bitmap inode marked free, ");
+                       set_inode_used(ino_rec, 1);
+                       if (!no_modify)
+                               do_warn("correcting\n");
+                       else
+                               do_warn("would correct\n");
+               }
+
+               if (is_inode_free(ino_rec, 2))  {
+                       do_warn("realtime summary inode marked free, ");
+                       set_inode_used(ino_rec, 2);
+                       if (!no_modify)
+                               do_warn("correcting\n");
+                       else
+                               do_warn("would correct\n");
+               }
+       }
+}
diff --git a/repair/phase3.c b/repair/phase3.c
new file mode 100644 (file)
index 0000000..e9f7af5
--- /dev/null
@@ -0,0 +1,215 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "avl.h"
+#include "globals.h"
+#include "agheader.h"
+#include "incore.h"
+#include "protos.h"
+#include "err_protos.h"
+#include "dinode.h"
+
+/*
+ * walks an unlinked list, returns 1 on an error (bogus pointer) or
+ * I/O error
+ */
+int
+walk_unlinked_list(xfs_mount_t *mp, xfs_agnumber_t agno, xfs_agino_t start_ino)
+{
+       xfs_buf_t *bp;
+       xfs_dinode_t *dip;
+       xfs_agino_t current_ino = start_ino;
+       xfs_agblock_t agbno;
+       int state;
+
+       while (current_ino != NULLAGINO)  {
+               if (!verify_aginum(mp, agno, current_ino))
+                       return(1);
+               if ((bp = get_agino_buf(mp, agno, current_ino, &dip)) == NULL)
+                       return(1);
+               /*
+                * if this looks like a decent inode, then continue
+                * following the unlinked pointers.  If not, bail.
+                */
+               if (verify_dinode(mp, dip, agno, current_ino) == 0)  {
+                       /*
+                        * check if the unlinked list points to an unknown
+                        * inode.  if so, put it on the uncertain inode list
+                        * and set block map appropriately.
+                        */
+                       if (find_inode_rec(agno, current_ino) == NULL)  {
+                               add_aginode_uncertain(agno, current_ino, 1);
+                               agbno = XFS_AGINO_TO_AGBNO(mp, current_ino);
+
+                               switch (state = get_agbno_state(mp,
+                                                       agno, agbno))  {
+                               case XR_E_UNKNOWN:
+                               case XR_E_FREE:
+                               case XR_E_FREE1:
+                                       set_agbno_state(mp, agno, agbno,
+                                               XR_E_INO);
+                                       break;
+                               case XR_E_BAD_STATE:
+                                       do_error(
+                                               "bad state in block map %d\n",
+                                               state);
+                                       abort();
+                                       break;
+                               default:
+                                       /*
+                                        * the block looks like inodes
+                                        * so be conservative and try
+                                        * to scavenge what's in there.
+                                        * if what's there is completely
+                                        * bogus, it'll show up later
+                                        * and the inode will be trashed
+                                        * anyway, hopefully without
+                                        * losing too much other data
+                                        */
+                                       set_agbno_state(mp, agno, agbno,
+                                               XR_E_INO);
+                                       break;
+                               }
+                       }
+                       current_ino = dip->di_next_unlinked;
+               } else  {
+                       current_ino = NULLAGINO;;
+               }
+               libxfs_putbuf(bp);
+       }
+
+       return(0);
+}
+
+void
+process_agi_unlinked(xfs_mount_t *mp, xfs_agnumber_t agno)
+{
+       xfs_agnumber_t i;
+       xfs_buf_t *bp;
+       xfs_agi_t *agip;
+       int err = 0;
+       int agi_dirty = 0;
+
+       bp = libxfs_readbuf(mp->m_dev, XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR),
+                               mp->m_sb.sb_sectsize/BBSIZE, 0);
+       if (!bp) {
+               do_error("cannot read agi block %lld for ag %u\n",
+                       XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR), agno);
+               exit(1);
+       }
+
+       agip = XFS_BUF_TO_AGI(bp);
+
+       ASSERT(no_modify || INT_GET(agip->agi_seqno, ARCH_CONVERT) == agno);
+
+       for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++)  {
+               if (INT_GET(agip->agi_unlinked[i], ARCH_CONVERT) != NULLAGINO)  {
+                       err += walk_unlinked_list(mp, agno,
+                                               INT_GET(agip->agi_unlinked[i], ARCH_CONVERT));
+                       /*
+                        * clear the list
+                        */
+                       if (!no_modify)  {
+                               INT_SET(agip->agi_unlinked[i], ARCH_CONVERT, NULLAGINO);
+                               agi_dirty = 1;
+                       }
+               }
+       }
+
+       if (err)
+               do_warn("error following ag %d unlinked list\n", agno);
+
+       ASSERT(agi_dirty == 0 || agi_dirty && !no_modify);
+
+       if (agi_dirty && !no_modify)
+               libxfs_writebuf(bp, 0);
+       else
+               libxfs_putbuf(bp);
+}
+
+void
+phase3(xfs_mount_t *mp)
+{
+       int i, j;
+
+       printf("Phase 3 - for each AG...\n");
+       if (!no_modify)
+               printf("        - scan and clear agi unlinked lists...\n");
+       else
+               printf("        - scan (but don't clear) agi unlinked lists...\n");
+
+       /*
+        * first, let's look at the possibly bogus inodes
+        */
+       for (i = 0; i < mp->m_sb.sb_agcount; i++)  {
+               /*
+                * walk unlinked list to add more potential inodes to list
+                */
+               process_agi_unlinked(mp, i);
+               check_uncertain_aginodes(mp, i);
+       }
+
+       /* ok, now that the tree's ok, let's take a good look */
+
+       printf(
+           "        - process known inodes and perform inode discovery...\n");
+
+       for (i = 0; i < mp->m_sb.sb_agcount; i++)  {
+               do_log("        - agno = %d\n", i);
+               /*
+                * turn on directory processing (inode discovery) and 
+                * attribute processing (extra_attr_check)
+                */
+               process_aginodes(mp, i, 1, 0, 1);
+       }
+
+       /*
+        * process newly discovered inode chunks
+        */
+       printf("        - process newly discovered inodes...\n");
+       do  {
+               /*
+                * have to loop until no ag has any uncertain
+                * inodes
+                */
+               j = 0;
+               for (i = 0; i < mp->m_sb.sb_agcount; i++)  {
+                       j += process_uncertain_aginodes(mp, i);
+#ifdef XR_INODE_TRACE
+                       fprintf(stderr,
+                               "\t\t phase 3 - process_uncertain_inodes returns %d\n", j);
+#endif
+               }
+       } while (j != 0);
+}
+
diff --git a/repair/phase4.c b/repair/phase4.c
new file mode 100644 (file)
index 0000000..d3e0bd1
--- /dev/null
@@ -0,0 +1,1337 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "avl.h"
+#include "globals.h"
+#include "agheader.h"
+#include "incore.h"
+#include "protos.h"
+#include "err_protos.h"
+#include "dinode.h"
+#include "dir.h"
+#include "bmap.h"
+#include "versions.h"
+#include "dir2.h"
+
+
+/* ARGSUSED */
+int
+lf_block_delete_orphanage(xfs_mount_t          *mp,
+                       xfs_ino_t               ino,
+                       xfs_dir_leafblock_t     *leaf,
+                       int                     *dirty,
+                       xfs_buf_t               *rootino_bp,
+                       int                     *rbuf_dirty)
+{
+       xfs_dir_leaf_entry_t    *entry;
+       xfs_dinode_t            *dino;
+       xfs_buf_t               *bp;
+       ino_tree_node_t         *irec;
+       xfs_ino_t               lino;
+       xfs_dir_leaf_name_t     *namest;
+       xfs_agino_t             agino;
+       xfs_agnumber_t          agno;
+       xfs_agino_t             root_agino;
+       xfs_agnumber_t          root_agno;
+       int                     i;
+       int                     ino_offset;
+       int                     ino_dirty;
+       int                     use_rbuf;
+       int                     len;
+       char                    fname[MAXNAMELEN + 1];
+       int                     res;
+
+       entry = &leaf->entries[0];
+       *dirty = 0;
+       use_rbuf = 0;
+       res = 0;
+       root_agno = XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino);
+       root_agino = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino);
+
+       for (i = 0; i < INT_GET(leaf->hdr.count, ARCH_CONVERT); entry++, i++) {
+               namest = XFS_DIR_LEAF_NAMESTRUCT(leaf,
+                       INT_GET(entry->nameidx, ARCH_CONVERT));
+               XFS_DIR_SF_GET_DIRINO_ARCH(&namest->inumber, &lino, ARCH_CONVERT);
+               bcopy(namest->name, fname, entry->namelen);
+               fname[entry->namelen] = '\0';
+
+               if (fname[0] != '/' && !strcmp(fname, ORPHANAGE))  {
+                       agino = XFS_INO_TO_AGINO(mp, lino);
+                       agno = XFS_INO_TO_AGNO(mp, lino);
+
+                       old_orphanage_ino = lino;
+
+                       irec = find_inode_rec(agno, agino);
+
+                       /*
+                        * if the orphange inode is in the tree,
+                        * get it, clear it, and mark it free.
+                        * the inodes in the orphanage will get
+                        * reattached to the new orphanage.
+                        */
+                       if (irec != NULL)  {
+                               ino_offset = agino - irec->ino_startnum;
+
+                               /*
+                                * check if we have to use the root inode
+                                * buffer or read one in ourselves.  Note
+                                * that the root inode is always the first
+                                * inode of the chunk that it's in so there
+                                * are two possible cases where lost+found
+                                * might be in the same buffer as the root
+                                * inode.  One case is a large block
+                                * filesystem where the two inodes are
+                                * in different inode chunks but wind
+                                * up in the same block (multiple chunks
+                                * per block) and the second case (one or
+                                * more blocks per chunk) is where the two
+                                * inodes are in the same chunk. Note that
+                                * inodes are allocated on disk in units
+                                * of MAX(XFS_INODES_PER_CHUNK,sb_inopblock).
+                                */
+                               if (XFS_INO_TO_FSB(mp, mp->m_sb.sb_rootino)
+                                               == XFS_INO_TO_FSB(mp, lino) ||
+                                   (agno == root_agno &&
+                                    agino < root_agino + XFS_INODES_PER_CHUNK)) {
+                                       use_rbuf = 1;
+                                       bp = rootino_bp;
+                                       dino = XFS_MAKE_IPTR(mp, bp, agino -
+                                               XFS_INO_TO_AGINO(mp,
+                                                       mp->m_sb.sb_rootino));
+                               } else {
+                                       len = (int)XFS_FSB_TO_BB(mp,
+                                               MAX(1, XFS_INODES_PER_CHUNK/
+                                                       inodes_per_block));
+                                       bp = libxfs_readbuf(mp->m_dev,
+                                               XFS_AGB_TO_DADDR(mp, agno,
+                                                       XFS_AGINO_TO_AGBNO(mp,
+                                                               irec->ino_startnum)),
+                                               len, 0);
+                                       if (!bp)
+                                               do_error("couldn't read %s inode %llu\n",
+                                                       ORPHANAGE, lino);
+
+                                       /*
+                                        * get the agbno containing the first
+                                        * inode in the chunk.  In multi-block
+                                        * chunks, this gets us the offset
+                                        * relative to the beginning of a
+                                        * properly aligned buffer.  In
+                                        * multi-chunk blocks, this gets us
+                                        * the correct block number.  Then
+                                        * turn the block number back into
+                                        * an agino and calculate the offset
+                                        * from there to feed to make the iptr.
+                                        * the last term in effect rounds down
+                                        * to the first agino in the buffer.
+                                        */
+                                       dino = XFS_MAKE_IPTR(mp, bp,
+                                               agino - XFS_OFFBNO_TO_AGINO(mp,
+                                                       XFS_AGINO_TO_AGBNO(mp,
+                                                       irec->ino_startnum),
+                                                       0));
+                               }
+
+                               do_warn("        - clearing existing \"%s\" inode\n",
+                                       ORPHANAGE);
+
+                               ino_dirty = clear_dinode(mp, dino, lino);
+
+                               if (!use_rbuf)  {
+                                       ASSERT(ino_dirty == 0 ||
+                                               ino_dirty && !no_modify);
+
+                                       if (ino_dirty && !no_modify)
+                                               libxfs_writebuf(bp, 0);
+                                       else
+                                               libxfs_putbuf(bp);
+                               } else  {
+                                       if (ino_dirty)
+                                               *rbuf_dirty = 1;
+                               }
+                               
+                               if (inode_isadir(irec, ino_offset))
+                                       clear_inode_isadir(irec, ino_offset);
+
+                               set_inode_free(irec, ino_offset);
+                       }
+
+                       /*
+                        * regardless of whether the inode num is good or
+                        * bad, mark the entry to be junked so the
+                        * createname in phase 6 will succeed.
+                        */
+                       namest->name[0] = '/';
+                       *dirty = 1;
+                       do_warn("        - marking entry \"%s\" to be deleted\n", fname);
+                       res++;
+               }
+       }
+
+       return(res);
+}
+
+int
+longform_delete_orphanage(xfs_mount_t  *mp,
+                       xfs_ino_t       ino,
+                       xfs_dinode_t    *dino,
+                       xfs_buf_t       *rootino_bp,
+                       int             *rbuf_dirty)
+{
+       xfs_dir_leafblock_t     *leaf;
+       xfs_buf_t               *bp;
+       xfs_dfsbno_t            fsbno;
+       xfs_dablk_t             da_bno;
+       int                     dirty;
+       int                     res;
+
+       da_bno = 0;
+       *rbuf_dirty = 0;
+
+       if ((fsbno = get_first_dblock_fsbno(mp, ino, dino)) == NULLDFSBNO)  {
+               do_error("couldn't map first leaf block of directory inode %llu\n", ino);
+               exit(1);
+       }
+
+       /*
+        * cycle through the entire directory looking to delete
+        * every "lost+found" entry.  make sure to catch duplicate
+        * entries.
+        *
+        * We could probably speed this up by doing a smarter lookup
+        * to get us to the first block that contains the hashvalue
+        * of "lost+found" but what the heck.  that would require a
+        * double lookup for each level.  and how big can '/' get???
+        * It's probably not worth it.
+        */
+       res = 0;
+
+       do {
+               ASSERT(fsbno != NULLDFSBNO);
+               bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, fsbno),
+                                       XFS_FSB_TO_BB(mp, 1), 0);
+               if (!bp) {
+                       do_error("can't read block %u (fsbno %llu) for directory inode "
+                               "%llu\n", da_bno, fsbno, ino);
+                       exit(1);
+               }
+
+               leaf = (xfs_dir_leafblock_t *)XFS_BUF_PTR(bp);
+
+               if (INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) != XFS_DIR_LEAF_MAGIC) {
+                       do_error("bad magic # (0x%x) for directory leaf block "
+                               "(bno %u fsbno %llu)\n",
+                               INT_GET(leaf->hdr.info.magic, ARCH_CONVERT),
+                               da_bno, fsbno);
+                       exit(1);
+               }
+
+               da_bno = INT_GET(leaf->hdr.info.forw, ARCH_CONVERT);
+
+               res += lf_block_delete_orphanage(mp, ino, leaf, &dirty,
+                                       rootino_bp, rbuf_dirty);
+
+               ASSERT(dirty == 0 || dirty && !no_modify);
+
+               if (dirty && !no_modify)
+                       libxfs_writebuf(bp, 0);
+               else
+                       libxfs_putbuf(bp);
+
+               if (da_bno != 0)
+                       fsbno = get_bmapi(mp, dino, ino, da_bno, XFS_DATA_FORK);
+
+       } while (da_bno != 0);
+
+       return(res);
+}
+
+/*
+ * returns 1 if a deletion happened, 0 otherwise.
+ */
+/* ARGSUSED */
+int
+shortform_delete_orphanage(xfs_mount_t *mp,
+                       xfs_ino_t       ino,
+                       xfs_dinode_t    *root_dino,
+                       xfs_buf_t       *rootino_bp,
+                       int             *ino_dirty)
+{
+       xfs_dir_shortform_t     *sf;
+       xfs_dinode_t            *dino;
+       xfs_dir_sf_entry_t      *sf_entry, *next_sfe, *tmp_sfe;
+       xfs_buf_t               *bp;
+       xfs_ino_t               lino;
+       xfs_agino_t             agino;
+       xfs_agino_t             root_agino;
+       int                     max_size;
+       xfs_agnumber_t          agno;
+       xfs_agnumber_t          root_agno;
+       int                     ino_dir_size;
+       ino_tree_node_t         *irec;
+       int                     ino_offset;
+       int                     i;
+       int                     dirty;
+       int                     tmp_len;
+       int                     tmp_elen;
+       int                     len;
+       int                     use_rbuf;
+       char                    fname[MAXNAMELEN + 1];
+       int                     res;
+
+       sf = &root_dino->di_u.di_dirsf;
+       *ino_dirty = 0;
+       res = 0;
+       irec = NULL;
+       ino_dir_size = INT_GET(root_dino->di_core.di_size, ARCH_CONVERT);
+       max_size = XFS_DFORK_DSIZE_ARCH(root_dino, mp, ARCH_CONVERT);
+       use_rbuf = 0;
+       root_agno = XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino);
+       root_agino = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino);
+
+       /*
+        * run through entries looking for "lost+found".
+        */
+       sf_entry = next_sfe = &sf->list[0];
+       for (i = 0; i < INT_GET(sf->hdr.count, ARCH_CONVERT) && ino_dir_size >
+                       (__psint_t)next_sfe - (__psint_t)sf; i++)  {
+               tmp_sfe = NULL;
+               sf_entry = next_sfe;
+               XFS_DIR_SF_GET_DIRINO_ARCH(&sf_entry->inumber, &lino, ARCH_CONVERT);
+               bcopy(sf_entry->name, fname, sf_entry->namelen);
+               fname[sf_entry->namelen] = '\0';
+
+               if (!strcmp(ORPHANAGE, fname))  {
+                       agno = XFS_INO_TO_AGNO(mp, lino);
+                       agino = XFS_INO_TO_AGINO(mp, lino);
+
+                       irec = find_inode_rec(agno, agino);
+
+                       /*
+                        * if the orphange inode is in the tree,
+                        * get it, clear it, and mark it free.
+                        * the inodes in the orphanage will get
+                        * reattached to the new orphanage.
+                        */
+                       if (irec != NULL) {
+                               do_warn("        - clearing existing \"%s\" inode\n",
+                                       ORPHANAGE);
+
+                               ino_offset = agino - irec->ino_startnum;
+
+                               /*
+                                * check if we have to use the root inode
+                                * buffer or read one in ourselves.  Note
+                                * that the root inode is always the first
+                                * inode of the chunk that it's in so there
+                                * are two possible cases where lost+found
+                                * might be in the same buffer as the root
+                                * inode.  One case is a large block
+                                * filesystem where the two inodes are
+                                * in different inode chunks but wind
+                                * up in the same block (multiple chunks
+                                * per block) and the second case (one or
+                                * more blocks per chunk) is where the two
+                                * inodes are in the same chunk. Note that
+                                * inodes are allocated on disk in units
+                                * of MAX(XFS_INODES_PER_CHUNK,sb_inopblock).
+                                */
+                               if (XFS_INO_TO_FSB(mp, mp->m_sb.sb_rootino)
+                                               == XFS_INO_TO_FSB(mp, lino) ||
+                                   (agno == root_agno &&
+                                    agino < root_agino + XFS_INODES_PER_CHUNK)) {
+                                       use_rbuf = 1;
+                                       bp = rootino_bp;
+
+                                       dino = XFS_MAKE_IPTR(mp, bp, agino -
+                                               XFS_INO_TO_AGINO(mp,
+                                                       mp->m_sb.sb_rootino));
+                               } else {
+                                       len = (int)XFS_FSB_TO_BB(mp,
+                                               MAX(1, XFS_INODES_PER_CHUNK/
+                                                       inodes_per_block));
+                                       bp = libxfs_readbuf(mp->m_dev,
+                                               XFS_AGB_TO_DADDR(mp, agno,
+                                                       XFS_AGINO_TO_AGBNO(mp,
+                                                               irec->ino_startnum)),
+                                               len, 0);
+                                       if (!bp)
+                                               do_error("could not read %s inode "
+                                                       "%llu\n", ORPHANAGE, lino);
+                                       /*
+                                        * get the agbno containing the first
+                                        * inode in the chunk.  In multi-block
+                                        * chunks, this gets us the offset
+                                        * relative to the beginning of a
+                                        * properly aligned buffer.  In
+                                        * multi-chunk blocks, this gets us
+                                        * the correct block number.  Then
+                                        * turn the block number back into
+                                        * an agino and calculate the offset
+                                        * from there to feed to make the iptr.
+                                        * the last term in effect rounds down
+                                        * to the first agino in the buffer.
+                                        */
+                                       dino = XFS_MAKE_IPTR(mp, bp,
+                                               agino - XFS_OFFBNO_TO_AGINO(mp,
+                                                       XFS_AGINO_TO_AGBNO(mp,
+                                                       irec->ino_startnum),
+                                                       0));
+                               }
+
+                               dirty = clear_dinode(mp, dino, lino);
+
+                               ASSERT(dirty == 0 || dirty && !no_modify);
+
+                               /*
+                                * if we read the lost+found inode in to
+                                * it, get rid of it here.  if the lost+found
+                                * inode is in the root inode buffer, the
+                                * buffer will be marked dirty anyway since
+                                * the lost+found entry in the root inode is
+                                * also being deleted which makes the root
+                                * inode buffer automatically dirty.
+                                */
+                               if (!use_rbuf)  {
+                                       dino = NULL;
+                                       if (dirty && !no_modify)
+                                               libxfs_writebuf(bp, 0);
+                                       else
+                                               libxfs_putbuf(bp);
+                               }
+
+                               if (inode_isadir(irec, ino_offset))
+                                       clear_inode_isadir(irec, ino_offset);
+
+                               set_inode_free(irec, ino_offset);
+                       }
+
+                       do_warn("        - deleting existing \"%s\" entry\n",
+                               ORPHANAGE);
+
+                       /*
+                        * note -- exactly the same deletion code as in
+                        * process_shortform_dir()
+                        */
+                       tmp_elen = XFS_DIR_SF_ENTSIZE_BYENTRY(sf_entry);
+                       INT_MOD(root_dino->di_core.di_size, ARCH_CONVERT, -(tmp_elen));
+
+                       tmp_sfe = (xfs_dir_sf_entry_t *)
+                               ((__psint_t) sf_entry + tmp_elen);
+                       tmp_len = max_size - ((__psint_t) tmp_sfe
+                                       - (__psint_t) sf);
+
+                       memmove(sf_entry, tmp_sfe, tmp_len);
+
+                       INT_MOD(sf->hdr.count, ARCH_CONVERT, -1);
+
+                       bzero((void *) ((__psint_t) sf_entry + tmp_len),
+                               tmp_elen);
+
+                       /*
+                        * set the tmp value to the current
+                        * pointer so we'll process the entry
+                        * we just moved up
+                        */
+                       tmp_sfe = sf_entry;
+
+                       /*
+                        * WARNING:  drop the index i by one
+                        * so it matches the decremented count for
+                        * accurate comparisons in the loop test.
+                        * mark root inode as dirty to make deletion
+                        * permanent.
+                        */
+                       i--;
+
+                       *ino_dirty = 1;
+                       res++;
+
+               }
+               next_sfe = (tmp_sfe == NULL)
+                       ? (xfs_dir_sf_entry_t *) ((__psint_t) sf_entry +
+                               XFS_DIR_SF_ENTSIZE_BYENTRY(sf_entry))
+                       : tmp_sfe;
+       }
+
+       return(res);
+}
+
+/* ARGSUSED */
+int
+lf2_block_delete_orphanage(xfs_mount_t         *mp,
+                       xfs_ino_t               ino,
+                       xfs_dir2_data_t         *data,
+                       int                     *dirty,
+                       xfs_buf_t               *rootino_bp,
+                       int                     *rbuf_dirty)
+{
+       xfs_dinode_t            *dino;
+       xfs_buf_t               *bp;
+       ino_tree_node_t         *irec;
+       xfs_ino_t               lino;
+       xfs_agino_t             agino;
+       xfs_agnumber_t          agno;
+       xfs_agino_t             root_agino;
+       xfs_agnumber_t          root_agno;
+       int                     ino_offset;
+       int                     ino_dirty;
+       int                     use_rbuf;
+       int                     len;
+       char                    fname[MAXNAMELEN + 1];
+       int                     res;
+       char                    *ptr;
+       char                    *endptr;
+       xfs_dir2_block_tail_t   *btp;
+       xfs_dir2_data_entry_t   *dep;
+       xfs_dir2_data_unused_t  *dup;
+
+       ptr = (char *)data->u;
+       if (INT_GET(data->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC) {
+               btp = XFS_DIR2_BLOCK_TAIL_P(mp, (xfs_dir2_block_t *)data);
+               endptr = (char *)XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT);
+       } else
+               endptr = (char *)data + mp->m_dirblksize;
+       *dirty = 0;
+       use_rbuf = 0;
+       res = 0;
+       root_agno = XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino);
+       root_agino = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino);
+
+       while (ptr < endptr) {
+               dup = (xfs_dir2_data_unused_t *)ptr;
+               if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG) {
+                       if (ptr + INT_GET(dup->length, ARCH_CONVERT) > endptr ||
+                               INT_GET(dup->length, ARCH_CONVERT) == 0 ||
+                               (INT_GET(dup->length, ARCH_CONVERT) &
+                                               (XFS_DIR2_DATA_ALIGN - 1)))
+                               break;
+                       ptr += INT_GET(dup->length, ARCH_CONVERT);
+                       continue;
+               }
+               dep = (xfs_dir2_data_entry_t *)ptr;
+               lino = INT_GET(dep->inumber, ARCH_CONVERT);
+               bcopy(dep->name, fname, dep->namelen);
+               fname[dep->namelen] = '\0';
+
+               if (fname[0] != '/' && !strcmp(fname, ORPHANAGE))  {
+                       agino = XFS_INO_TO_AGINO(mp, lino);
+                       agno = XFS_INO_TO_AGNO(mp, lino);
+
+                       old_orphanage_ino = lino;
+
+                       irec = find_inode_rec(agno, agino);
+
+                       /*
+                        * if the orphange inode is in the tree,
+                        * get it, clear it, and mark it free.
+                        * the inodes in the orphanage will get
+                        * reattached to the new orphanage.
+                        */
+                       if (irec != NULL)  {
+                               ino_offset = agino - irec->ino_startnum;
+
+                               /*
+                                * check if we have to use the root inode
+                                * buffer or read one in ourselves.  Note
+                                * that the root inode is always the first
+                                * inode of the chunk that it's in so there
+                                * are two possible cases where lost+found
+                                * might be in the same buffer as the root
+                                * inode.  One case is a large block
+                                * filesystem where the two inodes are
+                                * in different inode chunks but wind
+                                * up in the same block (multiple chunks
+                                * per block) and the second case (one or
+                                * more blocks per chunk) is where the two
+                                * inodes are in the same chunk. Note that
+                                * inodes are allocated on disk in units
+                                * of MAX(XFS_INODES_PER_CHUNK,sb_inopblock).
+                                */
+                               if (XFS_INO_TO_FSB(mp, mp->m_sb.sb_rootino)
+                                               == XFS_INO_TO_FSB(mp, lino) ||
+                                   (agno == root_agno &&
+                                    agino < root_agino + XFS_INODES_PER_CHUNK)) {
+                                       use_rbuf = 1;
+                                       bp = rootino_bp;
+                                       dino = XFS_MAKE_IPTR(mp, bp, agino -
+                                               XFS_INO_TO_AGINO(mp,
+                                                       mp->m_sb.sb_rootino));
+                               } else  {
+                                       len = (int)XFS_FSB_TO_BB(mp,
+                                               MAX(1, XFS_INODES_PER_CHUNK/
+                                                       inodes_per_block));
+                                       bp = libxfs_readbuf(mp->m_dev,
+                                               XFS_AGB_TO_DADDR(mp, agno,
+                                                       XFS_AGINO_TO_AGBNO(mp,
+                                                               irec->ino_startnum)),
+                                               len, 0);
+                                       if (!bp)
+                                               do_error("couldn't read %s inode %llu\n",
+                                                       ORPHANAGE, lino);
+
+                                       /*
+                                        * get the agbno containing the first
+                                        * inode in the chunk.  In multi-block
+                                        * chunks, this gets us the offset
+                                        * relative to the beginning of a
+                                        * properly aligned buffer.  In
+                                        * multi-chunk blocks, this gets us
+                                        * the correct block number.  Then
+                                        * turn the block number back into
+                                        * an agino and calculate the offset
+                                        * from there to feed to make the iptr.
+                                        * the last term in effect rounds down
+                                        * to the first agino in the buffer.
+                                        */
+                                       dino = XFS_MAKE_IPTR(mp, bp,
+                                               agino - XFS_OFFBNO_TO_AGINO(mp,
+                                                       XFS_AGINO_TO_AGBNO(mp,
+                                                       irec->ino_startnum),
+                                                       0));
+                               }
+
+                               do_warn("        - clearing existing \"%s\" inode\n",
+                                       ORPHANAGE);
+
+                               ino_dirty = clear_dinode(mp, dino, lino);
+
+                               if (!use_rbuf) {
+                                       ASSERT(ino_dirty == 0 ||
+                                               ino_dirty && !no_modify);
+
+                                       if (ino_dirty && !no_modify)
+                                               libxfs_writebuf(bp, 0);
+                                       else
+                                               libxfs_putbuf(bp);
+                               } else {
+                                       if (ino_dirty)
+                                               *rbuf_dirty = 1;
+                               }
+                               
+                               if (inode_isadir(irec, ino_offset))
+                                       clear_inode_isadir(irec, ino_offset);
+
+                               set_inode_free(irec, ino_offset);
+
+                       }
+
+                       /*
+                        * regardless of whether the inode num is good or
+                        * bad, mark the entry to be junked so the
+                        * createname in phase 6 will succeed.
+                        */
+                       dep->name[0] = '/';
+                       *dirty = 1;
+                       do_warn(
+                       "        - marking entry \"%s\" to be deleted\n",
+                                               fname);
+                       res++;
+               }
+               ptr += XFS_DIR2_DATA_ENTSIZE(dep->namelen);
+       }
+
+       return(res);
+}
+
+int
+longform2_delete_orphanage(xfs_mount_t *mp,
+                       xfs_ino_t       ino,
+                       xfs_dinode_t    *dino,
+                       xfs_buf_t       *rootino_bp,
+                       int             *rbuf_dirty)
+{
+       xfs_dir2_data_t         *data;
+       xfs_dabuf_t             *bp;
+       xfs_dfsbno_t            fsbno;
+       xfs_dablk_t             da_bno;
+       int                     dirty;
+       int                     res;
+       bmap_ext_t              *bmp;
+       int                     i;
+
+       da_bno = 0;
+       *rbuf_dirty = 0;
+       fsbno = NULLDFSBNO;
+       bmp = malloc(mp->m_dirblkfsbs * sizeof(*bmp));
+       if (!bmp) {
+               do_error(
+       "malloc failed (%u bytes) in longform2_delete_orphanage, ino %llu\n",
+                       mp->m_dirblkfsbs * sizeof(*bmp), ino);
+               exit(1);
+       }
+
+       /*
+        * cycle through the entire directory looking to delete
+        * every "lost+found" entry.  make sure to catch duplicate
+        * entries.
+        *
+        * We could probably speed this up by doing a smarter lookup
+        * to get us to the first block that contains the hashvalue
+        * of "lost+found" but what the heck.  that would require a
+        * double lookup for each level.  and how big can '/' get???
+        * It's probably not worth it.
+        */
+       res = 0;
+
+       for (da_bno = 0;
+            da_bno < XFS_B_TO_FSB(mp, INT_GET(dino->di_core.di_size, ARCH_CONVERT));
+            da_bno += mp->m_dirblkfsbs) {
+               for (i = 0; i < mp->m_dirblkfsbs; i++) {
+                       fsbno = get_bmapi(mp, dino, ino, da_bno + i,
+                                         XFS_DATA_FORK);
+                       if (fsbno == NULLDFSBNO)
+                               break;
+                       bmp[i].startoff = da_bno + i;
+                       bmp[i].startblock = fsbno;
+                       bmp[i].blockcount = 1;
+                       bmp[i].flag = 0;
+               }
+               if (fsbno == NULLDFSBNO)
+                       continue;
+               bp = da_read_buf(mp, mp->m_dirblkfsbs, bmp);
+               if (bp == NULL) {
+                       do_error(
+               "can't read block %u (fsbno %llu) for directory inode %llu\n",
+                                       da_bno, bmp[0].startblock, ino);
+                       exit(1);
+               }
+
+               data = (xfs_dir2_data_t *)bp->data;
+
+               if (INT_GET(data->hdr.magic, ARCH_CONVERT) != XFS_DIR2_DATA_MAGIC &&
+                   INT_GET(data->hdr.magic, ARCH_CONVERT) != XFS_DIR2_BLOCK_MAGIC)  {
+                       do_error(
+       "bad magic # (0x%x) for directory data block (bno %u fsbno %llu)\n",
+                               INT_GET(data->hdr.magic, ARCH_CONVERT), da_bno, bmp[0].startblock);
+                       exit(1);
+               }
+
+               res += lf2_block_delete_orphanage(mp, ino, data, &dirty,
+                                       rootino_bp, rbuf_dirty);
+
+               ASSERT(dirty == 0 || dirty && !no_modify);
+
+               if (dirty && !no_modify)
+                       da_bwrite(mp, bp);
+               else
+                       da_brelse(bp);
+       }
+       free(bmp);
+
+       return(res);
+}
+
+/*
+ * returns 1 if a deletion happened, 0 otherwise.
+ */
+/* ARGSUSED */
+int
+shortform2_delete_orphanage(xfs_mount_t        *mp,
+                       xfs_ino_t       ino,
+                       xfs_dinode_t    *root_dino,
+                       xfs_buf_t       *rootino_bp,
+                       int             *ino_dirty)
+{
+       xfs_dir2_sf_t           *sf;
+       xfs_dinode_t            *dino;
+       xfs_dir2_sf_entry_t     *sf_entry, *next_sfe, *tmp_sfe;
+       xfs_buf_t               *bp;
+       xfs_ino_t               lino;
+       xfs_agino_t             agino;
+       xfs_agino_t             root_agino;
+       int                     max_size;
+       xfs_agnumber_t          agno;
+       xfs_agnumber_t          root_agno;
+       int                     ino_dir_size;
+       ino_tree_node_t         *irec;
+       int                     ino_offset;
+       int                     i;
+       int                     dirty;
+       int                     tmp_len;
+       int                     tmp_elen;
+       int                     len;
+       int                     use_rbuf;
+       char                    fname[MAXNAMELEN + 1];
+       int                     res;
+
+       sf = &root_dino->di_u.di_dir2sf;
+       *ino_dirty = 0;
+       irec = NULL;
+       ino_dir_size = INT_GET(root_dino->di_core.di_size, ARCH_CONVERT);
+       max_size = XFS_DFORK_DSIZE_ARCH(root_dino, mp, ARCH_CONVERT);
+       use_rbuf = 0;
+       res = 0;
+       root_agno = XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino);
+       root_agino = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino);
+
+       /*
+        * run through entries looking for "lost+found".
+        */
+       sf_entry = next_sfe = XFS_DIR2_SF_FIRSTENTRY(sf);
+       for (i = 0; i < INT_GET(sf->hdr.count, ARCH_CONVERT) && ino_dir_size >
+                       (__psint_t)next_sfe - (__psint_t)sf; i++)  {
+               tmp_sfe = NULL;
+               sf_entry = next_sfe;
+               lino = XFS_DIR2_SF_GET_INUMBER_ARCH(sf,
+                       XFS_DIR2_SF_INUMBERP(sf_entry), ARCH_CONVERT);
+               bcopy(sf_entry->name, fname, sf_entry->namelen);
+               fname[sf_entry->namelen] = '\0';
+
+               if (!strcmp(ORPHANAGE, fname))  {
+                       agno = XFS_INO_TO_AGNO(mp, lino);
+                       agino = XFS_INO_TO_AGINO(mp, lino);
+
+                       irec = find_inode_rec(agno, agino);
+
+                       /*
+                        * if the orphange inode is in the tree,
+                        * get it, clear it, and mark it free.
+                        * the inodes in the orphanage will get
+                        * reattached to the new orphanage.
+                        */
+                       if (irec != NULL)  {
+                               do_warn("        - clearing existing \"%s\" inode\n",
+                                       ORPHANAGE);
+
+                               ino_offset = agino - irec->ino_startnum;
+
+                               /*
+                                * check if we have to use the root inode
+                                * buffer or read one in ourselves.  Note
+                                * that the root inode is always the first
+                                * inode of the chunk that it's in so there
+                                * are two possible cases where lost+found
+                                * might be in the same buffer as the root
+                                * inode.  One case is a large block
+                                * filesystem where the two inodes are
+                                * in different inode chunks but wind
+                                * up in the same block (multiple chunks
+                                * per block) and the second case (one or
+                                * more blocks per chunk) is where the two
+                                * inodes are in the same chunk. Note that
+                                * inodes are allocated on disk in units
+                                * of MAX(XFS_INODES_PER_CHUNK,sb_inopblock).
+                                */
+                               if (XFS_INO_TO_FSB(mp, mp->m_sb.sb_rootino)
+                                               == XFS_INO_TO_FSB(mp, lino) ||
+                                   (agno == root_agno &&
+                                    agino < root_agino + XFS_INODES_PER_CHUNK)) {
+                                       use_rbuf = 1;
+                                       bp = rootino_bp;
+
+                                       dino = XFS_MAKE_IPTR(mp, bp, agino -
+                                               XFS_INO_TO_AGINO(mp,
+                                                       mp->m_sb.sb_rootino));
+                               } else  {
+                                       len = (int)XFS_FSB_TO_BB(mp,
+                                               MAX(1, XFS_INODES_PER_CHUNK/
+                                                       inodes_per_block));
+                                       bp = libxfs_readbuf(mp->m_dev,
+                                               XFS_AGB_TO_DADDR(mp, agno,
+                                                       XFS_AGINO_TO_AGBNO(mp,
+                                                               irec->ino_startnum)),
+                                               len, 0);
+                                       if (!bp)
+                                               do_error("could not read %s inode "
+                                                       "%llu\n", ORPHANAGE, lino);
+                                       /*
+                                        * get the agbno containing the first
+                                        * inode in the chunk.  In multi-block
+                                        * chunks, this gets us the offset
+                                        * relative to the beginning of a
+                                        * properly aligned buffer.  In
+                                        * multi-chunk blocks, this gets us
+                                        * the correct block number.  Then
+                                        * turn the block number back into
+                                        * an agino and calculate the offset
+                                        * from there to feed to make the iptr.
+                                        * the last term in effect rounds down
+                                        * to the first agino in the buffer.
+                                        */
+                                       dino = XFS_MAKE_IPTR(mp, bp,
+                                               agino - XFS_OFFBNO_TO_AGINO(mp,
+                                                       XFS_AGINO_TO_AGBNO(mp,
+                                                       irec->ino_startnum),
+                                                       0));
+                               }
+
+                               dirty = clear_dinode(mp, dino, lino);
+
+                               ASSERT(dirty == 0 || dirty && !no_modify);
+
+                               /*
+                                * if we read the lost+found inode in to
+                                * it, get rid of it here.  if the lost+found
+                                * inode is in the root inode buffer, the
+                                * buffer will be marked dirty anyway since
+                                * the lost+found entry in the root inode is
+                                * also being deleted which makes the root
+                                * inode buffer automatically dirty.
+                                */
+                               if (!use_rbuf)  {
+                                       dino = NULL;
+                                       if (dirty && !no_modify)
+                                               libxfs_writebuf(bp, 0);
+                                       else
+                                               libxfs_putbuf(bp);
+                               }
+                               
+
+                               if (inode_isadir(irec, ino_offset))
+                                       clear_inode_isadir(irec, ino_offset);
+
+                               set_inode_free(irec, ino_offset);
+                       }
+
+                       do_warn("        - deleting existing \"%s\" entry\n",
+                               ORPHANAGE);
+
+                       /*
+                        * note -- exactly the same deletion code as in
+                        * process_shortform_dir()
+                        */
+                       tmp_elen = XFS_DIR2_SF_ENTSIZE_BYENTRY(sf, sf_entry);
+                       INT_MOD(root_dino->di_core.di_size, ARCH_CONVERT, -(tmp_elen));
+
+                       tmp_sfe = (xfs_dir2_sf_entry_t *)
+                               ((__psint_t) sf_entry + tmp_elen);
+                       tmp_len = max_size - ((__psint_t) tmp_sfe
+                                       - (__psint_t) sf);
+
+                       memmove(sf_entry, tmp_sfe, tmp_len);
+
+                       INT_MOD(sf->hdr.count, ARCH_CONVERT, -1);
+                       if (lino > XFS_DIR2_MAX_SHORT_INUM)
+                               sf->hdr.i8count--;
+
+                       bzero((void *) ((__psint_t) sf_entry + tmp_len),
+                               tmp_elen);
+
+                       /*
+                        * set the tmp value to the current
+                        * pointer so we'll process the entry
+                        * we just moved up
+                        */
+                       tmp_sfe = sf_entry;
+
+                       /*
+                        * WARNING:  drop the index i by one
+                        * so it matches the decremented count for
+                        * accurate comparisons in the loop test.
+                        * mark root inode as dirty to make deletion
+                        * permanent.
+                        */
+                       i--;
+
+                       *ino_dirty = 1;
+
+                       res++;
+               }
+               next_sfe = (tmp_sfe == NULL)
+                       ? (xfs_dir2_sf_entry_t *) ((__psint_t) sf_entry +
+                               XFS_DIR2_SF_ENTSIZE_BYENTRY(sf, sf_entry))
+                       : tmp_sfe;
+       }
+
+       return(res);
+}
+
+void
+delete_orphanage(xfs_mount_t *mp)
+{
+       xfs_ino_t ino;
+       xfs_dinode_t *dino;
+       xfs_buf_t *dbp;
+       int dirty, res, len;
+
+       ASSERT(!no_modify);
+
+       dbp = NULL;
+       dirty = res = 0;
+       ino = mp->m_sb.sb_rootino;
+
+       /*
+        * we know the root is in use or we wouldn't be here
+        */
+       len = (int)XFS_FSB_TO_BB(mp,
+                       MAX(1, XFS_INODES_PER_CHUNK/inodes_per_block));
+       dbp = libxfs_readbuf(mp->m_dev,
+                       XFS_FSB_TO_DADDR(mp, XFS_INO_TO_FSB(mp, ino)), len, 0);
+       if (!dbp) {
+               do_error("could not read buffer for root inode %llu "
+                       "(daddr %lld, size %d)\n", ino,
+                       XFS_FSB_TO_DADDR(mp, XFS_INO_TO_FSB(mp, ino)),
+                       XFS_FSB_TO_BB(mp, 1));
+       }
+
+       /*
+        * we also know that the root inode is always the first inode
+        * allocated in the system, therefore it'll be at the beginning
+        * of the root inode chunk
+        */
+       dino = XFS_MAKE_IPTR(mp, dbp, 0);
+
+       switch (dino->di_core.di_format)  {
+       case XFS_DINODE_FMT_EXTENTS:
+       case XFS_DINODE_FMT_BTREE:
+               if (XFS_SB_VERSION_HASDIRV2(&mp->m_sb))
+                       res = longform2_delete_orphanage(mp, ino, dino, dbp,
+                               &dirty);
+               else
+                       res = longform_delete_orphanage(mp, ino, dino, dbp,
+                               &dirty);
+               break;
+       case XFS_DINODE_FMT_LOCAL:
+               if (XFS_SB_VERSION_HASDIRV2(&mp->m_sb))
+                       res = shortform2_delete_orphanage(mp, ino, dino, dbp,
+                               &dirty);
+               else
+                       res = shortform_delete_orphanage(mp, ino, dino, dbp,
+                               &dirty);
+               ASSERT(res == 0 && dirty == 0 || res == 1 && dirty == 1);
+               break;
+       default:
+               break;
+       }
+
+       if (res)  {
+               switch (dino->di_core.di_version)  {
+               case XFS_DINODE_VERSION_1:
+                       INT_MOD(dino->di_core.di_onlink, ARCH_CONVERT, -1);
+                       INT_SET(dino->di_core.di_nlink, ARCH_CONVERT,
+                               INT_GET(dino->di_core.di_onlink, ARCH_CONVERT));
+                       break;
+               case XFS_DINODE_VERSION_2:
+                       INT_MOD(dino->di_core.di_nlink, ARCH_CONVERT, -1);
+                       break;
+               default:
+                       do_error("unknown version #%d in root inode\n",
+                                       dino->di_core.di_version);
+               }
+
+               dirty = 1;
+       }
+
+       if (dirty)
+               libxfs_writebuf(dbp, 0);
+       else
+               libxfs_putbuf(dbp);
+}
+
+/*
+ * null out quota inode fields in sb if they point to non-existent inodes.
+ * this isn't as redundant as it looks since it's possible that the sb field
+ * might be set but the imap and inode(s) agree that the inode is
+ * free in which case they'd never be cleared so the fields wouldn't
+ * be cleared by process_dinode().
+ */
+void
+quotino_check(xfs_mount_t *mp)
+{
+       ino_tree_node_t *irec;
+
+       if (mp->m_sb.sb_uquotino != NULLFSINO && mp->m_sb.sb_uquotino != 0)  {
+               irec = find_inode_rec(XFS_INO_TO_AGNO(mp, mp->m_sb.sb_uquotino),
+                       XFS_INO_TO_AGINO(mp, mp->m_sb.sb_uquotino));
+
+               if (irec == NULL || is_inode_free(irec,
+                               mp->m_sb.sb_uquotino - irec->ino_startnum))  {
+                       mp->m_sb.sb_uquotino = NULLFSINO;
+                       lost_uquotino = 1;
+               } else
+                       lost_uquotino = 0;
+       }
+
+       if (mp->m_sb.sb_pquotino != NULLFSINO && mp->m_sb.sb_pquotino != 0)  {
+               irec = find_inode_rec(XFS_INO_TO_AGNO(mp, mp->m_sb.sb_pquotino),
+                       XFS_INO_TO_AGINO(mp, mp->m_sb.sb_pquotino));
+
+               if (irec == NULL || is_inode_free(irec,
+                               mp->m_sb.sb_pquotino - irec->ino_startnum))  {
+                       mp->m_sb.sb_pquotino = NULLFSINO;
+                       lost_pquotino = 1;
+               } else
+                       lost_pquotino = 0;
+       }
+}
+
+void
+quota_sb_check(xfs_mount_t *mp)
+{
+       /*
+        * if the sb says we have quotas and we lost both,
+        * signal a superblock downgrade.  that will cause
+        * the quota flags to get zeroed.  (if we only lost
+        * one quota inode, do nothing and complain later.)
+        *
+        * if the sb says we have quotas but we didn't start out
+        * with any quota inodes, signal a superblock downgrade.
+        *
+        * The sb downgrades are so that older systems can mount
+        * the filesystem.
+        *
+        * if the sb says we don't have quotas but it looks like
+        * we do have quota inodes, then signal a superblock upgrade.
+        *
+        * if the sb says we don't have quotas and we have no
+        * quota inodes, then leave will enough alone.
+        */
+
+       if (fs_quotas &&
+           (mp->m_sb.sb_uquotino == NULLFSINO || mp->m_sb.sb_uquotino == 0) &&
+           (mp->m_sb.sb_pquotino == NULLFSINO || mp->m_sb.sb_pquotino == 0))  {
+               lost_quotas = 1;
+               fs_quotas = 0;
+       } else if (!verify_inum(mp, mp->m_sb.sb_uquotino) &&
+                       !verify_inum(mp, mp->m_sb.sb_uquotino))  {
+               fs_quotas = 1;
+       }
+}
+
+
+void
+phase4(xfs_mount_t *mp)
+{
+       ino_tree_node_t         *irec;
+       xfs_drtbno_t            bno;
+       xfs_drtbno_t            rt_start;
+       xfs_extlen_t            rt_len;
+       xfs_agnumber_t          i;
+       xfs_agblock_t           j;
+       xfs_agblock_t           ag_end;
+       xfs_agblock_t           extent_start;
+       xfs_extlen_t            extent_len;
+       int                     ag_hdr_len = 4 * mp->m_sb.sb_sectsize;
+       int                     ag_hdr_block;
+       int                     bstate;
+       int                     count_bcnt_extents(xfs_agnumber_t agno);
+       int                     count_bno_extents(xfs_agnumber_t agno);
+       
+       ag_hdr_block = howmany(ag_hdr_len, mp->m_sb.sb_blocksize);
+
+       printf("Phase 4 - check for duplicate blocks...\n");
+       printf("        - setting up duplicate extent list...\n");
+
+       irec = find_inode_rec(XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino),
+                               XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino));
+
+       /*
+        * we always have a root inode, even if it's free...
+        * if the root is free, forget it, lost+found is already gone
+        */
+       if (is_inode_free(irec, 0) || !inode_isadir(irec, 0))  {
+               need_root_inode = 1;
+               if (no_modify)
+                       do_warn("root inode would be lost\n");
+               else
+                       do_warn("root inode lost\n");
+       }
+
+       /*
+        * have to delete lost+found first so that blocks used
+        * by lost+found don't show up as used
+        */
+       if (!no_modify)  {
+               printf("        - clear lost+found (if it exists) ...\n");
+               if (!need_root_inode)
+                       delete_orphanage(mp);
+       }
+
+       for (i = 0; i < mp->m_sb.sb_agcount; i++)  {
+               ag_end = (i < mp->m_sb.sb_agcount - 1) ? mp->m_sb.sb_agblocks :
+                       mp->m_sb.sb_dblocks -
+                               (xfs_drfsbno_t) mp->m_sb.sb_agblocks * i;
+               extent_start = extent_len = 0;
+               /*
+                * set up duplicate extent list for this ag
+                */
+               for (j = ag_hdr_block; j < ag_end; j++)  {
+
+                       bstate = get_agbno_state(mp, i, j);
+
+                       switch (bstate)  {
+                       case XR_E_BAD_STATE:
+                       default:
+                               do_warn("unknown block state, ag %d, \
+block %d\n",
+                                       i, j);
+                               /* fall through .. */
+                       case XR_E_UNKNOWN:
+                       case XR_E_FREE1:
+                       case XR_E_FREE:
+                       case XR_E_INUSE:
+                       case XR_E_INUSE_FS:
+                       case XR_E_INO:
+                       case XR_E_FS_MAP:
+                               if (extent_start == 0)
+                                       continue;
+                               else  {
+                                       /*
+                                        * add extent and reset extent state
+                                        */
+                                       add_dup_extent(i, extent_start,
+                                                       extent_len);
+                                       extent_start = 0;
+                                       extent_len = 0;
+                               }
+                               break;
+                       case XR_E_MULT:
+                               if (extent_start == 0)  {
+                                       extent_start = j;
+                                       extent_len = 1;
+                               } else if (extent_len == MAXEXTLEN)  {
+                                       add_dup_extent(i, extent_start,
+                                                       extent_len);
+                                       extent_start = j;
+                                       extent_len = 1;
+                               } else
+                                       extent_len++;
+                               break;
+                       }
+               }
+               /*
+                * catch tail-case, extent hitting the end of the ag
+                */
+               if (extent_start != 0)
+                       add_dup_extent(i, extent_start, extent_len);
+       }
+
+       /*
+        * initialize realtime bitmap
+        */
+       rt_start = 0;
+       rt_len = 0;
+
+       for (bno = 0; bno < mp->m_sb.sb_rextents; bno++)  {
+
+               bstate = get_rtbno_state(mp, bno);
+
+               switch (bstate)  {
+               case XR_E_BAD_STATE:
+               default:
+                       do_warn("unknown rt extent state, extent %llu\n", bno);
+                       /* fall through .. */
+               case XR_E_UNKNOWN:
+               case XR_E_FREE1:
+               case XR_E_FREE:
+               case XR_E_INUSE:
+               case XR_E_INUSE_FS:
+               case XR_E_INO:
+               case XR_E_FS_MAP:
+                       if (rt_start == 0)
+                               continue;
+                       else  {
+                               /*
+                                * add extent and reset extent state
+                                */
+                               add_rt_dup_extent(rt_start, rt_len);
+                               rt_start = 0;
+                               rt_len = 0;
+                       }
+                       break;
+               case XR_E_MULT:
+                       if (rt_start == 0)  {
+                               rt_start = bno;
+                               rt_len = 1;
+                       } else if (rt_len == MAXEXTLEN)  {
+                               /*
+                                * large extent case
+                                */
+                               add_rt_dup_extent(rt_start, rt_len);
+                               rt_start = bno;
+                               rt_len = 1;
+                       } else
+                               rt_len++;
+                       break;
+               }
+       }
+
+       /*
+        * catch tail-case, extent hitting the end of the ag
+        */
+       if (rt_start != 0)
+               add_rt_dup_extent(rt_start, rt_len);
+
+       /*
+        * initialize bitmaps for all AGs
+        */
+       for (i = 0; i < mp->m_sb.sb_agcount; i++)  {
+               ag_end = (i < mp->m_sb.sb_agcount - 1) ? mp->m_sb.sb_agblocks :
+                       mp->m_sb.sb_dblocks -
+                               (xfs_drfsbno_t) mp->m_sb.sb_agblocks * i;
+               /*
+                * now reset the bitmap for all ags
+                */
+               bzero(ba_bmap[i], roundup(mp->m_sb.sb_agblocks*(NBBY/XR_BB),
+                                               sizeof(__uint64_t)));
+               for (j = 0; j < ag_hdr_block; j++)
+                       set_agbno_state(mp, i, j, XR_E_INUSE_FS);
+       }
+       set_bmap_rt(mp->m_sb.sb_rextents);
+       set_bmap_log(mp);
+       set_bmap_fs(mp);
+
+       printf("        - check for inodes claiming duplicate blocks...\n");
+       for (i = 0; i < mp->m_sb.sb_agcount; i++)  {
+               /*
+                * ok, now process the inodes -- signal 2-pass check per inode.
+                * first pass checks if the inode conflicts with a known
+                * duplicate extent.  if so, the inode is cleared and second
+                * pass is skipped.  second pass sets the block bitmap
+                * for all blocks claimed by the inode.  directory
+                * and attribute processing is turned OFF since we did that 
+                * already in phase 3.
+                */
+               do_log("        - agno = %d\n", i);
+               process_aginodes(mp, i, 0, 1, 0);
+
+               /*
+                * now recycle the per-AG duplicate extent records
+                */
+               release_dup_extent_tree(i);
+       }
+
+       /*
+        * free up memory used to track trealtime duplicate extents
+        */
+       if (rt_start != 0)
+               free_rt_dup_extent_tree(mp);
+
+       /*
+        * ensure consistency of quota inode pointers in superblock,
+        * make sure they point to real inodes
+        */
+       quotino_check(mp);
+       quota_sb_check(mp);
+}
diff --git a/repair/phase5.c b/repair/phase5.c
new file mode 100644 (file)
index 0000000..2e306bd
--- /dev/null
@@ -0,0 +1,1633 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "avl.h"
+#include "globals.h"
+#include "agheader.h"
+#include "incore.h"
+#include "protos.h"
+#include "err_protos.h"
+#include "dinode.h"
+#include "rt.h"
+#include "versions.h"
+
+/*
+ * we maintain the current slice (path from root to leaf)
+ * of the btree incore.  when we need a new block, we ask
+ * the block allocator for the address of a block on that
+ * level, map the block in, and set up the appropriate
+ * pointers (child, silbing, etc.) and keys that should
+ * point to the new block.
+ */
+typedef struct bt_stat_level  {
+       /*
+        * set in setup_cursor routine and maintained in the tree-building
+        * routines
+        */
+       xfs_buf_t               *buf_p;         /* 2 buffer pointers to ... */
+       xfs_buf_t               *prev_buf_p;
+       xfs_agblock_t           agbno;          /* current block being filled */
+       xfs_agblock_t           prev_agbno;     /* previous block */
+       /*
+        * set in calculate/init cursor routines for each btree level
+        */
+       int                     num_recs_tot;   /* # tree recs in level */
+       int                     num_blocks;     /* # tree blocks in level */
+       int                     num_recs_pb;    /* num_recs_tot / num_blocks */
+       int                     modulo;         /* num_recs_tot % num_blocks */
+} bt_stat_level_t;
+
+typedef struct bt_status  {
+       int                     init;           /* cursor set up once? */
+       int                     num_levels;     /* # of levels in btree */
+       xfs_extlen_t            num_tot_blocks; /* # blocks alloc'ed for tree */
+       xfs_extlen_t            num_free_blocks;/* # blocks currently unused */
+
+       xfs_agblock_t           root;           /* root block */
+       /*
+        * list of blocks to be used to set up this tree
+        * and pointer to the first unused block on the list
+        */
+       xfs_agblock_t           *btree_blocks;          /* block list */
+       xfs_agblock_t           *free_btree_blocks;     /* first unused block */
+       /*
+        * per-level status info
+        */
+       bt_stat_level_t         level[XFS_BTREE_MAXLEVELS];
+} bt_status_t;
+
+
+int
+mk_incore_fstree(xfs_mount_t *mp, xfs_agnumber_t agno)
+{
+       int                     in_extent;
+       int                     num_extents;
+       xfs_agblock_t           extent_start;
+       xfs_extlen_t            extent_len;
+       xfs_agblock_t           agbno;
+       xfs_agblock_t           ag_end;
+       uint                    free_blocks;
+#ifdef XR_BLD_FREE_TRACE
+       int                     old_state;
+       int                     state = XR_E_BAD_STATE;
+#endif
+
+       /*
+        * scan the bitmap for the ag looking for continuous
+        * extents of free blocks.  At this point, we know
+        * that blocks in the bitmap are either set to an
+        * "in use" state or set to unknown (0) since the
+        * bmaps were bzero'ed in phase 4 and only blocks
+        * being used by inodes, inode bmaps, ag headers,
+        * and the files themselves were put into the bitmap.
+        *
+        */
+       ASSERT(agno < mp->m_sb.sb_agcount);
+
+       extent_start = extent_len = 0;
+       in_extent = 0;
+       num_extents = free_blocks = 0;
+
+       if (agno < mp->m_sb.sb_agcount - 1)
+               ag_end = mp->m_sb.sb_agblocks;
+       else
+               ag_end = mp->m_sb.sb_dblocks -
+                       mp->m_sb.sb_agblocks * (mp->m_sb.sb_agcount - 1);
+
+       /*
+        * ok, now find the number of extents, keep track of the
+        * largest extent.
+        */
+       for (agbno = 0; agbno < ag_end; agbno++)  {
+#if 0
+               old_state = state;
+               state = get_agbno_state(mp, agno, agbno);
+               if (state != old_state)  {
+                       fprintf(stderr, "agbno %u - new state is %d\n",
+                                       agbno, state);
+               }
+#endif
+               if (get_agbno_state(mp, agno, agbno) < XR_E_INUSE)  {
+                       free_blocks++;
+                       if (in_extent == 0)  {
+                               /*
+                                * found the start of a free extent
+                                */
+                               in_extent = 1;
+                               num_extents++;
+                               extent_start = agbno;
+                               extent_len = 1;
+                       } else  {
+                               extent_len++;
+                       }
+               } else   {
+                       if (in_extent)  {
+                               /*
+                                * free extent ends here, add extent to the
+                                * 2 incore extent (avl-to-be-B+) trees
+                                */
+                               in_extent = 0;
+#if defined(XR_BLD_FREE_TRACE) && defined(XR_BLD_ADD_EXTENT)
+                               fprintf(stderr, "adding extent %u [%u %u]\n",
+                                       agno, extent_start, extent_len);
+#endif
+                               add_bno_extent(agno, extent_start, extent_len);
+                               add_bcnt_extent(agno, extent_start, extent_len);
+                       }
+               }
+       }
+       if (in_extent)  {
+               /*
+                * free extent ends here
+                */
+               in_extent = 0;
+#if defined(XR_BLD_FREE_TRACE) && defined(XR_BLD_ADD_EXTENT)
+               fprintf(stderr, "adding extent %u [%u %u]\n",
+                       agno, extent_start, extent_len);
+#endif
+               add_bno_extent(agno, extent_start, extent_len);
+               add_bcnt_extent(agno, extent_start, extent_len);
+       }
+
+       return(num_extents);
+}
+
+/* ARGSUSED */
+xfs_agblock_t
+get_next_blockaddr(xfs_agnumber_t agno, int level, bt_status_t *curs)
+{
+       ASSERT(curs->free_btree_blocks < curs->btree_blocks +
+                                               curs->num_tot_blocks);
+       ASSERT(curs->num_free_blocks > 0);
+
+       curs->num_free_blocks--;
+       return(*curs->free_btree_blocks++);
+}
+
+/*
+ * set up the dynamically allocated block allocation data in the btree
+ * cursor that depends on the info in the static portion of the cursor.
+ * allocates space from the incore bno/bcnt extent trees and sets up
+ * the first path up the left side of the tree.  Also sets up the
+ * cursor pointer to the btree root.   called by init_freespace_cursor()
+ * and init_ino_cursor()
+ */
+/* ARGSUSED */
+void
+setup_cursor(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *curs)
+{
+       int                     j;
+       unsigned int            u;
+       xfs_extlen_t            big_extent_len;
+       xfs_agblock_t           big_extent_start;
+       extent_tree_node_t      *ext_ptr;
+       extent_tree_node_t      *bno_ext_ptr;
+       xfs_extlen_t            blocks_allocated;
+       xfs_agblock_t           *agb_ptr;
+
+       /*
+        * get the number of blocks we need to allocate, then
+        * set up block number array, set the free block pointer
+        * to the first block in the array, and null the array
+        */
+       big_extent_len = curs->num_tot_blocks;
+       blocks_allocated = 0;
+
+       ASSERT(big_extent_len > 0);
+
+       if ((curs->btree_blocks = malloc(sizeof(xfs_agblock_t *)
+                                       * big_extent_len)) == NULL)  {
+               do_error("could not set up btree block array\n");
+               exit(1);
+       }
+
+       agb_ptr = curs->free_btree_blocks = curs->btree_blocks;
+
+       for (j = 0; j < curs->num_free_blocks; j++, agb_ptr++)
+               *agb_ptr = NULLAGBLOCK;
+
+       /*
+        * grab the smallest extent and use it up, then get the
+        * next smallest.  This mimics the init_*_cursor code.
+        */
+       if ((ext_ptr =  findfirst_bcnt_extent(agno)) == NULL)  {
+               do_error("error - not enough free space in filesystem\n");
+               exit(1);
+       }
+
+       agb_ptr = curs->btree_blocks;
+       j = curs->level[0].num_blocks;
+
+       /*
+        * set up the free block array
+        */
+       while (blocks_allocated < big_extent_len)  {
+               /*
+                * use up the extent we've got
+                */
+               for (u = 0; u < ext_ptr->ex_blockcount &&
+                               blocks_allocated < big_extent_len; u++)  {
+                       ASSERT(agb_ptr < curs->btree_blocks
+                                       + curs->num_tot_blocks);
+                       *agb_ptr++ = ext_ptr->ex_startblock + u;
+                       blocks_allocated++;
+               }
+
+               /*
+                * if we only used part of this last extent, then we
+                * need only to reset the extent in the extent
+                * trees and we're done
+                */
+               if (u < ext_ptr->ex_blockcount)  {
+                       big_extent_start = ext_ptr->ex_startblock + u;
+                       big_extent_len = ext_ptr->ex_blockcount - u;
+
+                       ASSERT(big_extent_len > 0);
+
+                       bno_ext_ptr = find_bno_extent(agno,
+                                               ext_ptr->ex_startblock);
+                       ASSERT(bno_ext_ptr != NULL);
+                       get_bno_extent(agno, bno_ext_ptr);
+                       release_extent_tree_node(bno_ext_ptr);
+
+                       ext_ptr = get_bcnt_extent(agno, ext_ptr->ex_startblock,
+                                       ext_ptr->ex_blockcount);
+                       release_extent_tree_node(ext_ptr);
+#ifdef XR_BLD_FREE_TRACE
+                       fprintf(stderr, "releasing extent: %u [%u %u]\n",
+                               agno, ext_ptr->ex_startblock,
+                               ext_ptr->ex_blockcount);
+                       fprintf(stderr, "blocks_allocated = %d\n",
+                               blocks_allocated);
+#endif
+
+                       add_bno_extent(agno, big_extent_start, big_extent_len);
+                       add_bcnt_extent(agno, big_extent_start, big_extent_len);
+
+                       return;
+               }
+               /*
+                * delete the used-up extent from both extent trees and
+                * find next biggest extent
+                */
+#ifdef XR_BLD_FREE_TRACE
+               fprintf(stderr, "releasing extent: %u [%u %u]\n",
+                       agno, ext_ptr->ex_startblock, ext_ptr->ex_blockcount);
+#endif
+               bno_ext_ptr = find_bno_extent(agno, ext_ptr->ex_startblock);
+               ASSERT(bno_ext_ptr != NULL);
+               get_bno_extent(agno, bno_ext_ptr);
+               release_extent_tree_node(bno_ext_ptr);
+
+               ext_ptr = get_bcnt_extent(agno, ext_ptr->ex_startblock,
+                               ext_ptr->ex_blockcount);
+               ASSERT(ext_ptr != NULL);
+               release_extent_tree_node(ext_ptr);
+
+               ext_ptr = findfirst_bcnt_extent(agno);
+       }
+#ifdef XR_BLD_FREE_TRACE
+       fprintf(stderr, "blocks_allocated = %d\n",
+               blocks_allocated);
+#endif
+}
+
+void
+write_cursor(bt_status_t *curs)
+{
+       int i;
+
+       for (i = 0; i < curs->num_levels; i++)  {
+#if defined(XR_BLD_FREE_TRACE) || defined(XR_BLD_INO_TRACE)
+               fprintf(stderr, "writing bt block %u\n", curs->level[i].agbno);
+#endif
+               if (curs->level[i].prev_buf_p != NULL)  {
+                       ASSERT(curs->level[i].prev_agbno != NULLAGBLOCK);
+                       libxfs_writebuf(curs->level[i].prev_buf_p, 0);
+               }
+               libxfs_writebuf(curs->level[i].buf_p, 0);
+       }
+}
+
+void
+finish_cursor(bt_status_t *curs)
+{
+       ASSERT(curs->num_free_blocks == 0);
+       free(curs->btree_blocks);
+}
+
+/*
+ * no-cursor versions of the XFS equivalents.  The address calculators
+ * should be used only for interior btree nodes.
+ * these are adapted from xfs_alloc_btree.h and xfs_tree.h
+ */
+#define XR_ALLOC_KEY_ADDR(mp, bp, i) \
+       (xfs_alloc_key_t *) ((char *) (bp) + sizeof(xfs_alloc_block_t) \
+                               + ((i)-1) * sizeof(xfs_alloc_key_t))
+
+#define XR_ALLOC_PTR_ADDR(mp, bp, i) \
+       (xfs_alloc_ptr_t *) ((char *) (bp) + sizeof(xfs_alloc_block_t) \
+                       + (mp)->m_alloc_mxr[1] * sizeof(xfs_alloc_key_t) \
+                       + ((i)-1) * sizeof(xfs_alloc_ptr_t))
+
+#define XR_ALLOC_BLOCK_MAXRECS(mp, level) \
+                       XFS_BTREE_BLOCK_MAXRECS((mp)->m_sb.sb_blocksize, \
+                                               xfs_alloc, (level) == 0)
+
+/*
+ * this calculates a freespace cursor for an ag.
+ * btree_curs is an in/out.  returns the number of
+ * blocks that will show up in the AGFL.
+ */
+
+int
+calculate_freespace_cursor(xfs_mount_t *mp, xfs_agnumber_t agno,
+                       xfs_agblock_t *extents, bt_status_t *btree_curs)
+{
+       xfs_extlen_t            blocks_needed;          /* a running count */
+       xfs_extlen_t            blocks_allocated_pt;    /* per tree */
+       xfs_extlen_t            blocks_allocated_total; /* for both trees */
+       xfs_agblock_t           num_extents;
+       int                     i;
+       int                     extents_used;
+       int                     extra_blocks;
+       bt_stat_level_t         *lptr;
+       bt_stat_level_t         *p_lptr;
+       extent_tree_node_t      *ext_ptr;
+       int                     level;
+#ifdef XR_BLD_FREE_TRACE
+       int                     old_state;
+       int                     state = XR_E_BAD_STATE;
+#endif
+#ifdef XR_BLD_FREE_TRACE
+       fprintf(stderr,
+               "in init_freespace_cursor, agno = %d\n", agno);
+#endif
+
+       num_extents = *extents;
+       extents_used = 0;
+
+       ASSERT(num_extents != 0);
+
+       lptr = &btree_curs->level[0];
+       btree_curs->init = 1;
+
+       /*
+        * figure out how much space we need for the leaf level
+        * of the tree and set up the cursor for the leaf level
+        * (note that the same code is duplicated further down)
+        */
+       lptr->num_blocks = howmany(num_extents, XR_ALLOC_BLOCK_MAXRECS(mp, 0));
+       lptr->num_recs_pb = num_extents / lptr->num_blocks;
+       lptr->modulo = num_extents % lptr->num_blocks;
+       lptr->num_recs_tot = num_extents;
+       level = 1;
+
+       /*
+        * if we need more levels, set them up.  # of records
+        * per level is the # of blocks in the level below it
+        */
+       if (lptr->num_blocks > 1)  {
+               for (; btree_curs->level[level - 1].num_blocks > 1
+                               && level < XFS_BTREE_MAXLEVELS;
+                               level++)  {
+                       lptr = &btree_curs->level[level];
+                       p_lptr = &btree_curs->level[level - 1];
+                       lptr->num_blocks = howmany(p_lptr->num_blocks,
+                                       XR_ALLOC_BLOCK_MAXRECS(mp, level));
+                       lptr->modulo = p_lptr->num_blocks
+                                       % lptr->num_blocks;
+                       lptr->num_recs_pb = p_lptr->num_blocks
+                                       / lptr->num_blocks;
+                       lptr->num_recs_tot = p_lptr->num_blocks;
+               }
+       }
+
+       ASSERT(lptr->num_blocks == 1);
+       btree_curs->num_levels = level;
+
+       /*
+        * ok, now we have a hypothetical cursor that
+        * will work for both the bno and bcnt trees.
+        * now figure out if using up blocks to set up the
+        * trees will perturb the shape of the freespace tree.
+        * if so, we've over-allocated.  the freespace trees
+        * as they will be *after* accounting for the free space
+        * we've used up will need fewer blocks to to represent
+        * than we've allocated.  We can use the AGFL to hold
+        * XFS_AGFL_SIZE (128) blocks but that's it.
+        * Thus we limit things to XFS_AGFL_SIZE/2 for each of the 2 btrees.
+        * if the number of extra blocks is more than that,
+        * we'll have to be called again.
+        */
+       for (blocks_needed = 0, i = 0; i < level; i++)  {
+               blocks_needed += btree_curs->level[i].num_blocks;
+       }
+
+       /*
+        * record the # of blocks we've allocated
+        */
+       blocks_allocated_pt = blocks_needed;
+       blocks_needed *= 2;
+       blocks_allocated_total = blocks_needed;
+
+       /*
+        * figure out how many free extents will be used up by
+        * our space allocation
+        */
+       if ((ext_ptr = findfirst_bcnt_extent(agno)) == NULL)  {
+               do_error("can't rebuild fs trees -- not enough free space "
+                       "on ag %u\n", agno);
+               exit(1);
+       }
+
+       i = 0;
+       while (ext_ptr != NULL && blocks_needed > 0)  {
+               if (ext_ptr->ex_blockcount <= blocks_needed)  {
+                       blocks_needed -= ext_ptr->ex_blockcount;
+                       extents_used++;
+               } else  {
+                       blocks_needed = 0;
+               }
+
+               ext_ptr = findnext_bcnt_extent(agno, ext_ptr);
+
+#ifdef XR_BLD_FREE_TRACE
+               if (ext_ptr != NULL)  {
+                       fprintf(stderr, "got next extent [%u %u]\n",
+                               ext_ptr->ex_startblock, ext_ptr->ex_blockcount);
+               } else  {
+                       fprintf(stderr, "out of extents\n");
+               }
+#endif
+       }
+       if (blocks_needed > 0)  {
+               do_error("ag %u - not enough free space to build freespace "
+                       "btrees\n", agno);
+               exit(1);
+       }
+
+       ASSERT(num_extents >= extents_used);
+
+       num_extents -= extents_used;
+
+       /*
+        * see if the number of leaf blocks will change as a result
+        * of the number of extents changing
+        */
+       if (howmany(num_extents, XR_ALLOC_BLOCK_MAXRECS(mp, 0))
+                       != btree_curs->level[0].num_blocks)  {
+               /*
+                * yes -- recalculate the cursor.  If the number of
+                * excess (overallocated) blocks is < XFS_AGFL_SIZE/2, we're ok.
+                * we can put those into the AGFL.  we don't try
+                * and get things to converge exactly (reach a
+                * state with zero excess blocks) because there
+                * exist pathological cases which will never
+                * converge.  first, check for the zero-case.
+                */
+               if (num_extents == 0)  {
+                       /*
+                        * ok, we've used up all the free blocks
+                        * trying to lay out the leaf level. go
+                        * to a one block (empty) btree and put the
+                        * already allocated blocks into the AGFL
+                        */
+                       if (btree_curs->level[0].num_blocks != 1)  {
+                               /*
+                                * we really needed more blocks because
+                                * the old tree had more than one level.
+                                * this is bad.
+                                */
+                                do_warn("not enough free blocks left to "
+                                       "describe all free blocks in AG %u\n",
+                                       agno);
+                       }
+#ifdef XR_BLD_FREE_TRACE
+                       fprintf(stderr,
+                               "ag %u -- no free extents, alloc'ed %d\n",
+                               agno, blocks_allocated_pt);
+#endif
+                       lptr->num_blocks = 1;
+                       lptr->modulo = 0;
+                       lptr->num_recs_pb = 0;
+                       lptr->num_recs_tot = 0;
+
+                       btree_curs->num_levels = 1;
+
+                       /*
+                        * don't reset the allocation stats, assume
+                        * they're all extra blocks
+                        * don't forget to return the total block count
+                        * not the per-tree block count.  these are the
+                        * extras that will go into the AGFL.  subtract
+                        * two for the root blocks.
+                        */
+                       btree_curs->num_tot_blocks = blocks_allocated_pt;
+                       btree_curs->num_free_blocks = blocks_allocated_pt;
+
+                       *extents = 0;
+
+                       return(blocks_allocated_total - 2);
+               }
+
+               lptr = &btree_curs->level[0];
+               lptr->num_blocks = howmany(num_extents,
+                                       XR_ALLOC_BLOCK_MAXRECS(mp, 0));
+               lptr->num_recs_pb = num_extents / lptr->num_blocks;
+               lptr->modulo = num_extents % lptr->num_blocks;
+               lptr->num_recs_tot = num_extents;
+               level = 1;
+
+               /*
+                * if we need more levels, set them up
+                */
+               if (lptr->num_blocks > 1)  {
+                       for (level = 1; btree_curs->level[level-1].num_blocks
+                                       > 1 && level < XFS_BTREE_MAXLEVELS;
+                                       level++)  {
+                               lptr = &btree_curs->level[level];
+                               p_lptr = &btree_curs->level[level-1];
+                               lptr->num_blocks = howmany(p_lptr->num_blocks,
+                                               XR_ALLOC_BLOCK_MAXRECS(mp,
+                                                               level));
+                               lptr->modulo = p_lptr->num_blocks
+                                               % lptr->num_blocks;
+                               lptr->num_recs_pb = p_lptr->num_blocks
+                                               / lptr->num_blocks;
+                               lptr->num_recs_tot = p_lptr->num_blocks;
+                       }
+               }
+               ASSERT(lptr->num_blocks == 1);
+               btree_curs->num_levels = level;
+
+               /*
+                * now figure out the number of excess blocks
+                */
+               for (blocks_needed = 0, i = 0; i < level; i++)  {
+                       blocks_needed += btree_curs->level[i].num_blocks;
+               }
+               blocks_needed *= 2;
+
+               ASSERT(blocks_allocated_total >= blocks_needed);
+               extra_blocks = blocks_allocated_total - blocks_needed;
+       } else  {
+               if (extents_used > 0) {
+                       /*
+                        * reset the leaf level geometry to account
+                        * for consumed extents.  we can leave the
+                        * rest of the cursor alone since the number
+                        * of leaf blocks hasn't changed.
+                        */
+                       lptr = &btree_curs->level[0];
+
+                       lptr->num_recs_pb = num_extents / lptr->num_blocks;
+                       lptr->modulo = num_extents % lptr->num_blocks;
+                       lptr->num_recs_tot = num_extents;
+               }
+
+               extra_blocks = 0;
+       }
+
+       btree_curs->num_tot_blocks = blocks_allocated_pt;
+       btree_curs->num_free_blocks = blocks_allocated_pt;
+
+       *extents = num_extents;
+
+       return(extra_blocks);
+}
+
+void
+prop_freespace_cursor(xfs_mount_t *mp, xfs_agnumber_t agno,
+               bt_status_t *btree_curs, xfs_agblock_t startblock,
+               xfs_extlen_t blockcount, int level, __uint32_t magic)
+{
+       xfs_alloc_block_t       *bt_hdr;
+       xfs_alloc_key_t         *bt_key;
+       xfs_alloc_ptr_t         *bt_ptr;
+       xfs_agblock_t           agbno;
+       bt_stat_level_t         *lptr;
+
+       level++;
+
+       if (level >= btree_curs->num_levels)
+               return;
+
+       lptr = &btree_curs->level[level];
+       bt_hdr = XFS_BUF_TO_ALLOC_BLOCK(lptr->buf_p);
+
+       if (INT_GET(bt_hdr->bb_numrecs, ARCH_CONVERT) == 0)  {
+               /*
+                * only happens once when initializing the
+                * left-hand side of the tree.
+                */
+               prop_freespace_cursor(mp, agno, btree_curs, startblock,
+                               blockcount, level, magic);
+       }
+
+       if (INT_GET(bt_hdr->bb_numrecs, ARCH_CONVERT) ==
+                       lptr->num_recs_pb + (lptr->modulo > 0))  {
+               /*
+                * write out current prev block, grab us a new block,
+                * and set the rightsib pointer of current block
+                */
+#ifdef XR_BLD_FREE_TRACE
+               fprintf(stderr, " %d ", lptr->prev_agbno);
+#endif
+               if (lptr->prev_agbno != NULLAGBLOCK) {
+                       ASSERT(lptr->prev_buf_p != NULL);
+                       libxfs_writebuf(lptr->prev_buf_p, 0);
+               }
+               lptr->prev_agbno = lptr->agbno;;
+               lptr->prev_buf_p = lptr->buf_p;
+               agbno = get_next_blockaddr(agno, level, btree_curs);
+
+               INT_SET(bt_hdr->bb_rightsib, ARCH_CONVERT, agbno);
+
+               lptr->buf_p = libxfs_getbuf(mp->m_dev,
+                                       XFS_AGB_TO_DADDR(mp, agno, agbno),
+                                       XFS_FSB_TO_BB(mp, 1));
+               lptr->agbno = agbno;
+
+               if (lptr->modulo)
+                       lptr->modulo--;
+
+               /*
+                * initialize block header
+                */
+               bt_hdr = XFS_BUF_TO_ALLOC_BLOCK(lptr->buf_p);
+               bzero(bt_hdr, mp->m_sb.sb_blocksize);
+
+               INT_SET(bt_hdr->bb_magic, ARCH_CONVERT, magic);
+               INT_SET(bt_hdr->bb_level, ARCH_CONVERT, level);
+               INT_SET(bt_hdr->bb_leftsib, ARCH_CONVERT, lptr->prev_agbno);
+               INT_SET(bt_hdr->bb_rightsib, ARCH_CONVERT, NULLAGBLOCK);
+               INT_ZERO(bt_hdr->bb_numrecs, ARCH_CONVERT);
+
+               /*
+                * propagate extent record for first extent in new block up
+                */
+               prop_freespace_cursor(mp, agno, btree_curs, startblock,
+                               blockcount, level, magic);
+       }
+       /*
+        * add extent info to current block
+        */
+       INT_MOD(bt_hdr->bb_numrecs, ARCH_CONVERT, +1);
+
+       bt_key = XR_ALLOC_KEY_ADDR(mp, bt_hdr,
+                       INT_GET(bt_hdr->bb_numrecs, ARCH_CONVERT));
+       bt_ptr = XR_ALLOC_PTR_ADDR(mp, bt_hdr,
+                       INT_GET(bt_hdr->bb_numrecs, ARCH_CONVERT));
+
+       INT_SET(bt_key->ar_startblock, ARCH_CONVERT, startblock);
+       INT_SET(bt_key->ar_blockcount, ARCH_CONVERT, blockcount);
+       INT_SET(*bt_ptr, ARCH_CONVERT, btree_curs->level[level-1].agbno);
+}
+
+/*
+ * rebuilds a freespace tree given a cursor and magic number of type
+ * of tree to build (bno or bcnt).  returns the number of free blocks
+ * represented by the tree.
+ */
+xfs_extlen_t
+build_freespace_tree(xfs_mount_t *mp, xfs_agnumber_t agno,
+               bt_status_t *btree_curs, __uint32_t magic)
+{
+       xfs_agnumber_t          i;
+       xfs_agblock_t           j;
+       xfs_alloc_block_t       *bt_hdr;
+       xfs_alloc_rec_t         *bt_rec;
+       int                     level;
+       xfs_agblock_t           agbno;
+       extent_tree_node_t      *ext_ptr;
+       bt_stat_level_t         *lptr;
+       xfs_extlen_t            freeblks;
+
+#ifdef XR_BLD_FREE_TRACE
+       fprintf(stderr, "in build_freespace_tree, agno = %d\n", agno);
+#endif
+       level = btree_curs->num_levels;
+       freeblks = 0;
+
+       ASSERT(level > 0);
+
+       /*
+        * initialize the first block on each btree level
+        */
+       for (i = 0; i < level; i++)  {
+               lptr = &btree_curs->level[i];
+
+               agbno = get_next_blockaddr(agno, i, btree_curs);
+               lptr->buf_p = libxfs_getbuf(mp->m_dev,
+                                       XFS_AGB_TO_DADDR(mp, agno, agbno),
+                                       XFS_FSB_TO_BB(mp, 1));
+
+               if (i == btree_curs->num_levels - 1)
+                       btree_curs->root = agbno;
+
+               lptr->agbno = agbno;
+               lptr->prev_agbno = NULLAGBLOCK;
+               lptr->prev_buf_p = NULL;
+               /*
+                * initialize block header
+                */
+               bt_hdr = XFS_BUF_TO_ALLOC_BLOCK(lptr->buf_p);
+               bzero(bt_hdr, mp->m_sb.sb_blocksize);
+
+               INT_SET(bt_hdr->bb_magic, ARCH_CONVERT, magic);
+               INT_SET(bt_hdr->bb_level, ARCH_CONVERT, i);
+               INT_SET(bt_hdr->bb_leftsib, ARCH_CONVERT,
+                               bt_hdr->bb_rightsib = NULLAGBLOCK);
+               INT_ZERO(bt_hdr->bb_numrecs, ARCH_CONVERT);
+       }
+       /*
+        * run along leaf, setting up records.  as we have to switch
+        * blocks, call the prop_freespace_cursor routine to set up the new
+        * pointers for the parent.  that can recurse up to the root
+        * if required.  set the sibling pointers for leaf level here.
+        */
+       if (magic == XFS_ABTB_MAGIC)
+               ext_ptr = findfirst_bno_extent(agno);
+       else 
+               ext_ptr = findfirst_bcnt_extent(agno);
+
+#ifdef XR_BLD_FREE_TRACE
+       fprintf(stderr, "bft, agno = %d, start = %u, count = %u\n",
+               agno, ext_ptr->ex_startblock, ext_ptr->ex_blockcount);
+#endif
+
+       lptr = &btree_curs->level[0];
+
+       for (i = 0; i < btree_curs->level[0].num_blocks; i++)  {
+               /*
+                * block initialization, lay in block header
+                */
+               bt_hdr = XFS_BUF_TO_ALLOC_BLOCK(lptr->buf_p);
+               bzero(bt_hdr, mp->m_sb.sb_blocksize);
+
+               INT_SET(bt_hdr->bb_magic, ARCH_CONVERT, magic);
+               INT_ZERO(bt_hdr->bb_level, ARCH_CONVERT);
+               INT_SET(bt_hdr->bb_leftsib, ARCH_CONVERT, lptr->prev_agbno);
+               INT_SET(bt_hdr->bb_rightsib, ARCH_CONVERT, NULLAGBLOCK);
+               INT_SET(bt_hdr->bb_numrecs, ARCH_CONVERT,
+                               lptr->num_recs_pb + (lptr->modulo > 0));
+#ifdef XR_BLD_FREE_TRACE
+               fprintf(stderr, "bft, bb_numrecs = %d\n",
+                               INT_GET(bt_hdr->bb_numrecs, ARCH_CONVERT));
+#endif
+
+               if (lptr->modulo > 0)
+                       lptr->modulo--;
+
+               /*
+                * initialize values in the path up to the root if
+                * this is a multi-level btree
+                */
+               if (btree_curs->num_levels > 1)
+                       prop_freespace_cursor(mp, agno, btree_curs,
+                                       ext_ptr->ex_startblock,
+                                       ext_ptr->ex_blockcount,
+                                       0, magic);
+
+               bt_rec = (xfs_alloc_rec_t *) ((char *) bt_hdr +
+                                               sizeof(xfs_alloc_block_t));
+               for (j = 0; j < INT_GET(bt_hdr->bb_numrecs,ARCH_CONVERT); j++) {
+                       ASSERT(ext_ptr != NULL);
+                       INT_SET(bt_rec[j].ar_startblock, ARCH_CONVERT,
+                               ext_ptr->ex_startblock);
+                       INT_SET(bt_rec[j].ar_blockcount, ARCH_CONVERT,
+                               ext_ptr->ex_blockcount);
+                       freeblks += ext_ptr->ex_blockcount;
+                       if (magic == XFS_ABTB_MAGIC)
+                               ext_ptr = findnext_bno_extent(ext_ptr);
+                       else
+                               ext_ptr = findnext_bcnt_extent(agno, ext_ptr);
+#if 0
+#ifdef XR_BLD_FREE_TRACE
+                       if (ext_ptr == NULL)
+                               fprintf(stderr, "null extent pointer, j = %d\n",
+                                       j);
+                       else
+                               fprintf(stderr,
+                               "bft, agno = %d, start = %u, count = %u\n",
+                                       agno, ext_ptr->ex_startblock,
+                                       ext_ptr->ex_blockcount);
+#endif
+#endif
+               }
+
+               if (ext_ptr != NULL)  {
+                       /*
+                        * get next leaf level block
+                        */
+                       if (lptr->prev_buf_p != NULL)  {
+#ifdef XR_BLD_FREE_TRACE
+                               fprintf(stderr, " writing fst agbno %u\n",
+                                       lptr->prev_agbno);
+#endif
+                               ASSERT(lptr->prev_agbno != NULLAGBLOCK);
+                               libxfs_writebuf(lptr->prev_buf_p, 0);
+                       }
+                       lptr->prev_buf_p = lptr->buf_p;
+                       lptr->prev_agbno = lptr->agbno;
+
+                       INT_SET(bt_hdr->bb_rightsib, ARCH_CONVERT, lptr->agbno =
+                               get_next_blockaddr(agno, 0, btree_curs));
+
+                       lptr->buf_p = libxfs_getbuf(mp->m_dev,
+                                       XFS_AGB_TO_DADDR(mp, agno, lptr->agbno),
+                                       XFS_FSB_TO_BB(mp, 1));
+               }
+       }
+
+       return(freeblks);
+}
+
+/*
+ * no-cursor versions of the XFS equivalents.  The address calculators
+ * should be used only for interior btree nodes.
+ * these are adapted from xfs_ialloc_btree.h and xfs_tree.h
+ */
+#define XR_INOBT_KEY_ADDR(mp, bp, i) \
+       (xfs_inobt_key_t *) ((char *) (bp) + sizeof(xfs_inobt_block_t) \
+                               + ((i)-1) * sizeof(xfs_inobt_key_t))
+
+#define XR_INOBT_PTR_ADDR(mp, bp, i) \
+       (xfs_inobt_ptr_t *) ((char *) (bp) + sizeof(xfs_inobt_block_t) \
+                       + (mp)->m_inobt_mxr[1] * sizeof(xfs_inobt_key_t) \
+                       + ((i)-1) * sizeof(xfs_inobt_ptr_t))
+
+#define XR_INOBT_BLOCK_MAXRECS(mp, level) \
+                       XFS_BTREE_BLOCK_MAXRECS((mp)->m_sb.sb_blocksize, \
+                                               xfs_inobt, (level) == 0)
+
+/*
+ * we don't have to worry here about how chewing up free extents
+ * may perturb things because inode tree building happens before
+ * freespace tree building.
+ */
+void
+init_ino_cursor(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *btree_curs,
+               __uint64_t *num_inos, __uint64_t *num_free_inos)
+{
+       __uint64_t              ninos;
+       __uint64_t              nfinos;
+       ino_tree_node_t         *ino_rec;
+       int                     num_recs;
+       int                     level;
+       bt_stat_level_t         *lptr;
+       bt_stat_level_t         *p_lptr;
+       xfs_extlen_t            blocks_allocated;
+       int                     i;
+
+       *num_inos = *num_free_inos = 0;
+       ninos = nfinos = 0;
+
+       lptr = &btree_curs->level[0];
+       btree_curs->init = 1;
+
+       if ((ino_rec = findfirst_inode_rec(agno)) == NULL)  {
+               /*
+                * easy corner-case -- no inode records
+                */
+               lptr->num_blocks = 1;
+               lptr->modulo = 0;
+               lptr->num_recs_pb = 0;
+               lptr->num_recs_tot = 0;
+
+               btree_curs->num_levels = 1;
+               btree_curs->num_tot_blocks = btree_curs->num_free_blocks = 1;
+
+               setup_cursor(mp, agno, btree_curs);
+
+               return;
+       }
+
+       /*
+        * build up statistics
+        */
+       for (num_recs = 0; ino_rec != NULL; ino_rec = next_ino_rec(ino_rec))  {
+               ninos += XFS_INODES_PER_CHUNK;
+               num_recs++;
+               for (i = 0; i < XFS_INODES_PER_CHUNK; i++)  {
+                       ASSERT(is_inode_confirmed(ino_rec, i));
+                       if (is_inode_free(ino_rec, i))
+                               nfinos++;
+               }
+       }
+
+       blocks_allocated = lptr->num_blocks = howmany(num_recs,
+                                       XR_INOBT_BLOCK_MAXRECS(mp, 0));
+
+       lptr->modulo = num_recs % lptr->num_blocks;
+       lptr->num_recs_pb = num_recs / lptr->num_blocks;
+       lptr->num_recs_tot = num_recs;
+       level = 1;
+
+       if (lptr->num_blocks > 1)  {
+               for (; btree_curs->level[level-1].num_blocks > 1
+                               && level < XFS_BTREE_MAXLEVELS;
+                               level++)  {
+                       lptr = &btree_curs->level[level];
+                       p_lptr = &btree_curs->level[level - 1];
+                       lptr->num_blocks = howmany(p_lptr->num_blocks,
+                               XR_INOBT_BLOCK_MAXRECS(mp, level));
+                       lptr->modulo = p_lptr->num_blocks % lptr->num_blocks;
+                       lptr->num_recs_pb = p_lptr->num_blocks
+                                       / lptr->num_blocks;
+                       lptr->num_recs_tot = p_lptr->num_blocks;
+
+                       blocks_allocated += lptr->num_blocks;
+               }
+       }
+       ASSERT(lptr->num_blocks == 1);
+       btree_curs->num_levels = level;
+
+       btree_curs->num_tot_blocks = btree_curs->num_free_blocks
+                       = blocks_allocated;
+
+       setup_cursor(mp, agno, btree_curs);
+
+       *num_inos = ninos;
+       *num_free_inos = nfinos;
+
+       return;
+}
+
+void
+prop_ino_cursor(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *btree_curs,
+       xfs_agino_t startino, int level)
+{
+       xfs_inobt_block_t       *bt_hdr;
+       xfs_inobt_key_t         *bt_key;
+       xfs_inobt_ptr_t         *bt_ptr;
+       xfs_agblock_t           agbno;
+       bt_stat_level_t         *lptr;
+
+       level++;
+
+       if (level >= btree_curs->num_levels)
+               return;
+
+       lptr = &btree_curs->level[level];
+       bt_hdr = XFS_BUF_TO_INOBT_BLOCK(lptr->buf_p);
+
+       if (INT_GET(bt_hdr->bb_numrecs, ARCH_CONVERT) == 0)  {
+               /*
+                * this only happens once to initialize the
+                * first path up the left side of the tree
+                * where the agbno's are already set up
+                */
+               prop_ino_cursor(mp, agno, btree_curs, startino, level);
+       }
+
+       if (INT_GET(bt_hdr->bb_numrecs, ARCH_CONVERT) ==
+                               lptr->num_recs_pb + (lptr->modulo > 0))  {
+               /*
+                * write out current prev block, grab us a new block,
+                * and set the rightsib pointer of current block
+                */
+#ifdef XR_BLD_INO_TRACE
+               fprintf(stderr, " ino prop agbno %d ", lptr->prev_agbno);
+#endif
+               if (lptr->prev_agbno != NULLAGBLOCK)  {
+                       ASSERT(lptr->prev_buf_p != NULL);
+                       libxfs_writebuf(lptr->prev_buf_p, 0);
+               }
+               lptr->prev_agbno = lptr->agbno;;
+               lptr->prev_buf_p = lptr->buf_p;
+               agbno = get_next_blockaddr(agno, level, btree_curs);
+
+               INT_SET(bt_hdr->bb_rightsib, ARCH_CONVERT, agbno);
+
+               lptr->buf_p = libxfs_getbuf(mp->m_dev,
+                                       XFS_AGB_TO_DADDR(mp, agno, agbno),
+                                       XFS_FSB_TO_BB(mp, 1));
+               lptr->agbno = agbno;
+
+               if (lptr->modulo)
+                       lptr->modulo--;
+
+               /*
+                * initialize block header
+                */
+               bt_hdr = XFS_BUF_TO_INOBT_BLOCK(lptr->buf_p);
+               bzero(bt_hdr, mp->m_sb.sb_blocksize);
+
+               INT_SET(bt_hdr->bb_magic, ARCH_CONVERT, XFS_IBT_MAGIC);
+               INT_SET(bt_hdr->bb_level, ARCH_CONVERT, level);
+               INT_SET(bt_hdr->bb_leftsib, ARCH_CONVERT, lptr->prev_agbno);
+               INT_SET(bt_hdr->bb_rightsib, ARCH_CONVERT, NULLAGBLOCK);
+               INT_ZERO(bt_hdr->bb_numrecs, ARCH_CONVERT);
+               /*
+                * propagate extent record for first extent in new block up
+                */
+               prop_ino_cursor(mp, agno, btree_curs, startino, level);
+       }
+       /*
+        * add inode info to current block
+        */
+       INT_MOD(bt_hdr->bb_numrecs, ARCH_CONVERT, +1);
+
+       bt_key = XR_INOBT_KEY_ADDR(mp, bt_hdr,
+                       INT_GET(bt_hdr->bb_numrecs, ARCH_CONVERT));
+       bt_ptr = XR_INOBT_PTR_ADDR(mp, bt_hdr,
+                       INT_GET(bt_hdr->bb_numrecs, ARCH_CONVERT));
+
+       INT_SET(bt_key->ir_startino, ARCH_CONVERT, startino);
+       INT_SET(*bt_ptr, ARCH_CONVERT, btree_curs->level[level-1].agbno);
+}
+
+void
+build_agi(xfs_mount_t *mp, xfs_agnumber_t agno,
+               bt_status_t *btree_curs, xfs_agino_t first_agino,
+               xfs_agino_t count, xfs_agino_t freecount)
+{
+       xfs_buf_t       *agi_buf;
+       xfs_agi_t       *agi;
+       int             i;
+
+       agi_buf = libxfs_getbuf(mp->m_dev,
+                       XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR),
+                       mp->m_sb.sb_sectsize/BBSIZE);
+       agi = XFS_BUF_TO_AGI(agi_buf);
+       bzero(agi, mp->m_sb.sb_sectsize);
+
+       INT_SET(agi->agi_magicnum, ARCH_CONVERT, XFS_AGI_MAGIC);
+       INT_SET(agi->agi_versionnum, ARCH_CONVERT, XFS_AGI_VERSION);
+       INT_SET(agi->agi_seqno, ARCH_CONVERT, agno);
+       if (agno < mp->m_sb.sb_agcount - 1)
+               INT_SET(agi->agi_length, ARCH_CONVERT, mp->m_sb.sb_agblocks);
+       else
+               INT_SET(agi->agi_length, ARCH_CONVERT, mp->m_sb.sb_dblocks -
+                       (xfs_drfsbno_t) mp->m_sb.sb_agblocks * agno);
+       INT_SET(agi->agi_count, ARCH_CONVERT, count);
+       INT_SET(agi->agi_root, ARCH_CONVERT, btree_curs->root);
+       INT_SET(agi->agi_level, ARCH_CONVERT, btree_curs->num_levels);
+       INT_SET(agi->agi_freecount, ARCH_CONVERT, freecount);
+       INT_SET(agi->agi_newino, ARCH_CONVERT, first_agino);
+       INT_SET(agi->agi_dirino, ARCH_CONVERT, NULLAGINO);
+
+       for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++)  {
+               INT_SET(agi->agi_unlinked[i], ARCH_CONVERT, NULLAGINO);
+       }
+
+       libxfs_writebuf(agi_buf, 0);
+}
+
+/*
+ * rebuilds an inode tree given a cursor.  We're lazy here and call
+ * the routine that builds the agi
+ */
+void
+build_ino_tree(xfs_mount_t *mp, xfs_agnumber_t agno,
+               bt_status_t *btree_curs)
+{
+       xfs_agnumber_t          i;
+       xfs_agblock_t           j;
+       xfs_agblock_t           agbno;
+       xfs_agino_t             first_agino;
+       xfs_inobt_block_t       *bt_hdr;
+       xfs_inobt_rec_t         *bt_rec;
+       ino_tree_node_t         *ino_rec;
+       bt_stat_level_t         *lptr;
+       xfs_agino_t             count = 0;
+       xfs_agino_t             freecount = 0;
+       int                     inocnt;
+       int                     k;
+       int                     level = btree_curs->num_levels;
+
+       for (i = 0; i < level; i++)  {
+               lptr = &btree_curs->level[i];
+
+               agbno = get_next_blockaddr(agno, i, btree_curs);
+               lptr->buf_p = libxfs_getbuf(mp->m_dev,
+                                       XFS_AGB_TO_DADDR(mp, agno, agbno),
+                                       XFS_FSB_TO_BB(mp, 1));
+
+               if (i == btree_curs->num_levels - 1)
+                       btree_curs->root = agbno;
+
+               lptr->agbno = agbno;
+               lptr->prev_agbno = NULLAGBLOCK;
+               lptr->prev_buf_p = NULL;
+               /*
+                * initialize block header
+                */
+               bt_hdr = XFS_BUF_TO_INOBT_BLOCK(lptr->buf_p);
+               bzero(bt_hdr, mp->m_sb.sb_blocksize);
+
+               INT_SET(bt_hdr->bb_magic, ARCH_CONVERT, XFS_IBT_MAGIC);
+               INT_SET(bt_hdr->bb_level, ARCH_CONVERT, i);
+               INT_SET(bt_hdr->bb_leftsib, ARCH_CONVERT,
+                               bt_hdr->bb_rightsib = NULLAGBLOCK);
+               INT_ZERO(bt_hdr->bb_numrecs, ARCH_CONVERT);
+       }
+       /*
+        * run along leaf, setting up records.  as we have to switch
+        * blocks, call the prop_ino_cursor routine to set up the new
+        * pointers for the parent.  that can recurse up to the root
+        * if required.  set the sibling pointers for leaf level here.
+        */
+       ino_rec = findfirst_inode_rec(agno);
+
+       if (ino_rec != NULL)
+               first_agino = ino_rec->ino_startnum;
+       else
+               first_agino = NULLAGINO;
+
+       lptr = &btree_curs->level[0];
+
+       for (i = 0; i < lptr->num_blocks; i++)  {
+               /*
+                * block initialization, lay in block header
+                */
+               bt_hdr = XFS_BUF_TO_INOBT_BLOCK(lptr->buf_p);
+               bzero(bt_hdr, mp->m_sb.sb_blocksize);
+
+               INT_SET(bt_hdr->bb_magic, ARCH_CONVERT, XFS_IBT_MAGIC);
+               INT_ZERO(bt_hdr->bb_level, ARCH_CONVERT);
+               INT_SET(bt_hdr->bb_leftsib, ARCH_CONVERT, lptr->prev_agbno);
+               INT_SET(bt_hdr->bb_rightsib, ARCH_CONVERT, NULLAGBLOCK);
+               INT_SET(bt_hdr->bb_numrecs, ARCH_CONVERT,
+                               lptr->num_recs_pb + (lptr->modulo > 0));
+
+               if (lptr->modulo > 0)
+                       lptr->modulo--;
+
+               if (lptr->num_recs_pb > 0)
+                       prop_ino_cursor(mp, agno, btree_curs,
+                                       ino_rec->ino_startnum, 0);
+
+               bt_rec = (xfs_inobt_rec_t *) ((char *) bt_hdr +
+                                               sizeof(xfs_inobt_block_t));
+               for (j = 0; j < INT_GET(bt_hdr->bb_numrecs,ARCH_CONVERT); j++) {
+                       ASSERT(ino_rec != NULL);
+                       INT_SET(bt_rec[j].ir_startino, ARCH_CONVERT,
+                                       ino_rec->ino_startnum);
+                       INT_SET(bt_rec[j].ir_free, ARCH_CONVERT,
+                                       ino_rec->ir_free);
+
+                       inocnt = 0;
+                       for (k = 0; k < sizeof(xfs_inofree_t)*NBBY; k++)  {
+                               ASSERT(is_inode_confirmed(ino_rec, k));
+                               inocnt += is_inode_free(ino_rec, k);
+                       }
+
+                       INT_SET(bt_rec[j].ir_freecount, ARCH_CONVERT, inocnt);
+                       freecount += inocnt;
+                       count += XFS_INODES_PER_CHUNK;
+                       ino_rec = next_ino_rec(ino_rec);
+               }
+
+               if (ino_rec != NULL)  {
+                       /*
+                        * get next leaf level block
+                        */
+                       if (lptr->prev_buf_p != NULL)  {
+#ifdef XR_BLD_INO_TRACE
+                               fprintf(stderr, "writing inobt agbno %u\n",
+                                       lptr->prev_agbno);
+#endif
+                               ASSERT(lptr->prev_agbno != NULLAGBLOCK);
+                               libxfs_writebuf(lptr->prev_buf_p, 0);
+                       }
+                       lptr->prev_buf_p = lptr->buf_p;
+                       lptr->prev_agbno = lptr->agbno;
+
+                       INT_SET(bt_hdr->bb_rightsib, ARCH_CONVERT, lptr->agbno=
+                               get_next_blockaddr(agno, 0, btree_curs));
+
+                       lptr->buf_p = libxfs_getbuf(mp->m_dev,
+                                       XFS_AGB_TO_DADDR(mp, agno, lptr->agbno),
+                                       XFS_FSB_TO_BB(mp, 1));
+               }
+       }
+
+       build_agi(mp, agno, btree_curs, first_agino, count, freecount);
+}
+
+/*
+ * build both the agf and the agfl for an agno given both
+ * btree cursors
+ */
+void
+build_agf_agfl(xfs_mount_t     *mp,
+               xfs_agnumber_t  agno,
+               bt_status_t     *bno_bt,
+               bt_status_t     *bcnt_bt,
+               xfs_extlen_t    freeblks,       /* # free blocks in tree */
+               int             lostblocks)     /* # blocks that will be lost */
+{
+       extent_tree_node_t      *ext_ptr;
+       xfs_buf_t               *agf_buf, *agfl_buf;
+       int                     i;
+       int                     j;
+       xfs_agfl_t              *agfl;
+       xfs_agf_t               *agf;
+
+       agf_buf = libxfs_getbuf(mp->m_dev,
+                               XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR),
+                               mp->m_sb.sb_sectsize/BBSIZE);
+       agf = XFS_BUF_TO_AGF(agf_buf);
+       bzero(agf, mp->m_sb.sb_sectsize);
+
+#ifdef XR_BLD_FREE_TRACE
+       fprintf(stderr, "agf = 0x%x, agf_buf->b_un.b_addr = 0x%x\n",
+               (__psint_t) agf, (__psint_t) agf_buf->b_un.b_addr);
+#endif
+
+       /*
+        * set up fixed part of agf
+        */
+       INT_SET(agf->agf_magicnum, ARCH_CONVERT, XFS_AGF_MAGIC);
+       INT_SET(agf->agf_versionnum, ARCH_CONVERT, XFS_AGF_VERSION);
+       INT_SET(agf->agf_seqno, ARCH_CONVERT, agno);
+
+       if (agno < mp->m_sb.sb_agcount - 1)
+               INT_SET(agf->agf_length, ARCH_CONVERT, mp->m_sb.sb_agblocks);
+       else
+               INT_SET(agf->agf_length, ARCH_CONVERT, mp->m_sb.sb_dblocks -
+                       (xfs_drfsbno_t) mp->m_sb.sb_agblocks * agno);
+
+       INT_SET(agf->agf_roots[XFS_BTNUM_BNO], ARCH_CONVERT, bno_bt->root);
+       INT_SET(agf->agf_levels[XFS_BTNUM_BNO], ARCH_CONVERT,
+                       bno_bt->num_levels);
+       INT_SET(agf->agf_roots[XFS_BTNUM_CNT], ARCH_CONVERT, bcnt_bt->root);
+       INT_SET(agf->agf_levels[XFS_BTNUM_CNT], ARCH_CONVERT,
+                       bcnt_bt->num_levels);
+       INT_SET(agf->agf_freeblks, ARCH_CONVERT, freeblks);
+
+#ifdef XR_BLD_FREE_TRACE
+       fprintf(stderr, "bno root = %u, bcnt root = %u, indices = %u %u\n",
+                       INT_GET(agf->agf_roots[XFS_BTNUM_BNO], ARCH_CONVERT),
+                       INT_GET(agf->agf_roots[XFS_BTNUM_CNT], ARCH_CONVERT),
+                       XFS_BTNUM_BNO,
+                       XFS_BTNUM_CNT);
+#endif
+
+       /*
+        * do we have left-over blocks in the btree cursors that should
+        * be used to fill the AGFL?
+        */
+       if (bno_bt->num_free_blocks > 0 || bcnt_bt->num_free_blocks > 0)  {
+               /*
+                * yes - grab the AGFL buffer
+                */
+               agfl_buf = libxfs_getbuf(mp->m_dev,
+                               XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR),
+                               mp->m_sb.sb_sectsize/BBSIZE);
+               agfl = XFS_BUF_TO_AGFL(agfl_buf);
+               bzero(agfl, mp->m_sb.sb_sectsize);
+               /*
+                * ok, now grab as many blocks as we can
+                */
+               i = j = 0;
+               while (bno_bt->num_free_blocks > 0 && i < XFS_AGFL_SIZE)  {
+                       INT_SET(agfl->agfl_bno[i], ARCH_CONVERT,
+                               get_next_blockaddr(agno, 0, bno_bt));
+                       i++;
+               }
+
+               while (bcnt_bt->num_free_blocks > 0 && i < XFS_AGFL_SIZE)  {
+                       INT_SET(agfl->agfl_bno[i], ARCH_CONVERT,
+                               get_next_blockaddr(agno, 0, bcnt_bt));
+                       i++;
+               }
+               /*
+                * now throw the rest of the blocks away and complain
+                */
+               while (bno_bt->num_free_blocks > 0)  {
+                       (void) get_next_blockaddr(agno, 0, bno_bt);
+                       j++;
+               }
+               while (bcnt_bt->num_free_blocks > 0)  {
+                       (void) get_next_blockaddr(agno, 0, bcnt_bt);
+                       j++;
+               }
+
+               if (j > 0)  {
+                       if (j == lostblocks)
+                               do_warn("lost %d blocks in ag %u\n", j, agno);
+                       else
+                               do_warn("thought we were going to lose %d "
+                                       "blocks in ag %u, actually lost %d\n",
+                                       lostblocks, j, agno);
+               }
+
+               INT_ZERO(agf->agf_flfirst, ARCH_CONVERT);
+               INT_SET(agf->agf_fllast, ARCH_CONVERT, i - 1);
+               INT_SET(agf->agf_flcount, ARCH_CONVERT, i);
+
+#ifdef XR_BLD_FREE_TRACE
+               fprintf(stderr, "writing agfl for ag %u\n", agno);
+#endif
+
+               libxfs_writebuf(agfl_buf, 0);
+       } else  {
+               INT_ZERO(agf->agf_flfirst, ARCH_CONVERT);
+               INT_SET(agf->agf_fllast, ARCH_CONVERT, XFS_AGFL_SIZE - 1);
+               INT_ZERO(agf->agf_flcount, ARCH_CONVERT);
+       }
+
+       ext_ptr = findbiggest_bcnt_extent(agno);
+       INT_SET(agf->agf_longest, ARCH_CONVERT,
+                       (ext_ptr != NULL) ? ext_ptr->ex_blockcount : 0);
+
+       ASSERT(INT_GET(agf->agf_roots[XFS_BTNUM_BNOi], ARCH_CONVERT) !=
+               INT_GET(agf->agf_roots[XFS_BTNUM_CNTi], ARCH_CONVERT));
+
+       libxfs_writebuf(agf_buf, 0);
+
+#ifdef XR_BLD_FREE_TRACE
+       fprintf(stderr, "wrote agf for ag %u, error = %d\n", agno, error);
+#endif
+}
+
+/*
+ * update the superblock counters, sync the sb version numbers and
+ * feature bits to the filesystem, and sync up the on-disk superblock
+ * to match the incore superblock.
+ */
+void
+sync_sb(xfs_mount_t *mp)
+{
+       xfs_sb_t        *sbp;
+       xfs_buf_t       *bp;
+
+       bp = libxfs_getsb(mp, 0);
+       if (!bp)
+               do_error("couldn't get superblock\n");
+
+       sbp = XFS_BUF_TO_SBP(bp);
+
+       mp->m_sb.sb_icount = sb_icount;
+       mp->m_sb.sb_ifree = sb_ifree;
+       mp->m_sb.sb_fdblocks = sb_fdblocks;
+       mp->m_sb.sb_frextents = sb_frextents;
+
+       update_sb_version(mp);
+
+       *sbp = mp->m_sb;
+        libxfs_xlate_sb(XFS_BUF_PTR(bp), sbp, -1, ARCH_CONVERT,
+                       XFS_SB_ALL_BITS);
+       libxfs_writebuf(bp, 0);
+}
+
+/*
+ * make sure the root and realtime inodes show up allocated
+ * even if they've been freed.  they get reinitialized in phase6.
+ */
+void
+keep_fsinos(xfs_mount_t *mp)
+{
+       ino_tree_node_t         *irec;
+       int                     i;
+
+       irec = find_inode_rec(XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino),
+                       XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino));
+
+       for (i = 0; i < 3; i++)
+               set_inode_used(irec, i);
+}
+
+void
+phase5(xfs_mount_t *mp)
+{
+       __uint64_t      num_inos;
+       __uint64_t      num_free_inos;
+       bt_status_t     bno_btree_curs;
+       bt_status_t     bcnt_btree_curs;
+       bt_status_t     ino_btree_curs;
+       xfs_agnumber_t  agno;
+       int             extra_blocks = 0;
+       uint            num_freeblocks;
+       xfs_extlen_t    freeblks1;
+       xfs_extlen_t    freeblks2;
+       xfs_agblock_t   num_extents;
+       extern int      count_bno_extents(xfs_agnumber_t);
+       extern int      count_bno_extents_blocks(xfs_agnumber_t, uint *);
+#ifdef XR_BLD_FREE_TRACE
+       extern int      count_bcnt_extents(xfs_agnumber_t);
+#endif
+
+       do_log("Phase 5 - rebuild AG headers and trees...\n");
+
+#ifdef XR_BLD_FREE_TRACE
+       fprintf(stderr, "inobt level 1, maxrec = %d, minrec = %d\n",
+               XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_inobt, 0),
+               XFS_BTREE_BLOCK_MINRECS(mp->m_sb.sb_blocksize, xfs_inobt, 0)
+               );
+       fprintf(stderr, "inobt level 0 (leaf), maxrec = %d, minrec = %d\n",
+               XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_inobt, 1),
+               XFS_BTREE_BLOCK_MINRECS(mp->m_sb.sb_blocksize, xfs_inobt, 1)
+               );
+       fprintf(stderr, "xr inobt level 0 (leaf), maxrec = %d\n",
+               XR_INOBT_BLOCK_MAXRECS(mp, 0));
+       fprintf(stderr, "xr inobt level 1 (int), maxrec = %d\n",
+               XR_INOBT_BLOCK_MAXRECS(mp, 1));
+       fprintf(stderr, "bnobt level 1, maxrec = %d, minrec = %d\n",
+               XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_alloc, 0),
+               XFS_BTREE_BLOCK_MINRECS(mp->m_sb.sb_blocksize, xfs_alloc, 0));
+       fprintf(stderr, "bnobt level 0 (leaf), maxrec = %d, minrec = %d\n",
+               XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_alloc, 1),
+               XFS_BTREE_BLOCK_MINRECS(mp->m_sb.sb_blocksize, xfs_alloc, 1));
+#endif
+
+       /*
+        * make sure the root and realtime inodes show up allocated
+        */
+       keep_fsinos(mp);
+
+       for (agno = 0; agno < mp->m_sb.sb_agcount; agno++)  {
+               /*
+                * build up incore bno and bcnt extent btrees
+                */
+               num_extents = mk_incore_fstree(mp, agno);
+
+#ifdef XR_BLD_FREE_TRACE
+               fprintf(stderr, "# of bno extents is %d\n",
+                               count_bno_extents(agno));
+#endif
+
+               if (num_extents == 0)  {
+                       /*
+                        * XXX - what we probably should do here is pick an
+                        * inode for a regular file in the allocation group
+                        * that has space allocated and shoot it by traversing
+                        * the bmap list and putting all its extents on the
+                        * incore freespace trees, clearing the inode,
+                        * and clearing the in-use bit in the incore inode
+                        * tree.  Then try mk_incore_fstree() again.
+                        */
+                       do_error("unable to rebuild AG %u.  "
+                               "Not enough free space in on-disk AG.\n", agno);
+               }
+
+               /*
+                * done with the AG bitmap, toss it...
+                */
+               teardown_ag_bmap(mp, agno);
+
+               /*
+                * ok, now set up the btree cursors for the
+                * on-disk btrees (includs pre-allocating all
+                * required blocks for the trees themselves)
+                */
+               init_ino_cursor(mp, agno, &ino_btree_curs,
+                               &num_inos, &num_free_inos);
+
+               sb_icount += num_inos;
+               sb_ifree += num_free_inos;
+
+               num_extents = count_bno_extents_blocks(agno, &num_freeblocks);
+               /*
+                * lose two blocks per AG -- the space tree roots
+                * are counted as allocated since the space trees
+                * always have roots
+                */
+               sb_fdblocks += num_freeblocks - 2;
+
+               if (num_extents == 0)  {
+                       /*
+                        * XXX - what we probably should do here is pick an
+                        * inode for a regular file in the allocation group
+                        * that has space allocated and shoot it by traversing
+                        * the bmap list and putting all its extents on the
+                        * incore freespace trees, clearing the inode,
+                        * and clearing the in-use bit in the incore inode
+                        * tree.  Then try mk_incore_fstree() again.
+                        */
+                       do_error("unable to rebuild AG %u.  No free space.\n",
+                               agno);
+                       exit(1);
+               }
+
+#ifdef XR_BLD_FREE_TRACE
+               fprintf(stderr, "# of bno extents is %d\n", num_extents);
+#endif
+
+               /*
+                * track blocks that we might really lose
+                */
+               extra_blocks = calculate_freespace_cursor(mp, agno,
+                                       &num_extents, &bno_btree_curs);
+
+               /*
+                * freespace btrees live in the "free space" but
+                * the filesystem treats AGFL blocks as allocated
+                * since they aren't described by the freespace trees
+                */
+
+               /*
+                * see if we can fit all the extra blocks into the AGFL
+                */
+               extra_blocks = (extra_blocks - XFS_AGFL_SIZE > 0)
+                               ? extra_blocks - XFS_AGFL_SIZE
+                               : 0;
+
+               if (extra_blocks > 0)  {
+                       do_warn("lost %d blocks in agno %d, sorry.\n",
+                               extra_blocks, agno);
+                       sb_fdblocks -= extra_blocks;
+               }
+
+               bcnt_btree_curs = bno_btree_curs;
+
+               setup_cursor(mp, agno, &bno_btree_curs);
+               setup_cursor(mp, agno, &bcnt_btree_curs);
+
+#ifdef XR_BLD_FREE_TRACE
+               fprintf(stderr, "# of bno extents is %d\n",
+                               count_bno_extents(agno));
+               fprintf(stderr, "# of bcnt extents is %d\n",
+                               count_bcnt_extents(agno));
+#endif
+               /*
+                * now rebuild the freespace trees
+                */
+               freeblks1 = build_freespace_tree(mp, agno, &bno_btree_curs,
+                                       XFS_ABTB_MAGIC);
+#ifdef XR_BLD_FREE_TRACE
+               fprintf(stderr, "# of free blocks == %d\n", freeblks1);
+#endif
+               write_cursor(&bno_btree_curs);
+
+               freeblks2 = build_freespace_tree(mp, agno, &bcnt_btree_curs,
+                                       XFS_ABTC_MAGIC);
+               write_cursor(&bcnt_btree_curs);
+
+               ASSERT(freeblks1 == freeblks2);
+
+               /*
+                * set up agf and agfl
+                */
+               build_agf_agfl(mp, agno, &bno_btree_curs,
+                               &bcnt_btree_curs, freeblks1, extra_blocks);
+               /*
+                * build inode allocation tree.  this also build the agi
+                */
+               build_ino_tree(mp, agno, &ino_btree_curs);
+               write_cursor(&ino_btree_curs);
+               /*
+                * tear down cursors
+                */
+               finish_cursor(&bno_btree_curs);
+               finish_cursor(&ino_btree_curs);
+               finish_cursor(&bcnt_btree_curs);
+               /*
+                * release the incore per-AG bno/bcnt trees so
+                * the extent nodes can be recycled
+                */
+               release_agbno_extent_tree(agno);
+               release_agbcnt_extent_tree(agno);
+       }
+
+       if (mp->m_sb.sb_rblocks)  {
+               do_log(
+               "        - generate realtime summary info and bitmap...\n");
+               rtinit(mp);
+               generate_rtinfo(mp, btmcompute, sumcompute);
+               teardown_rt_bmap(mp);
+       }
+
+       do_log("        - reset superblock...\n");
+
+       /*
+        * sync superblock counter and set version bits correctly
+        */
+       sync_sb(mp);
+
+       bad_ino_btree = 0;
+}
diff --git a/repair/phase6.c b/repair/phase6.c
new file mode 100644 (file)
index 0000000..1babc07
--- /dev/null
@@ -0,0 +1,3971 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <errno.h>
+#include <libxfs.h>
+#include "avl.h"
+#include "globals.h"
+#include "agheader.h"
+#include "incore.h"
+#include "dir.h"
+#include "dir2.h"
+#include "dir_stack.h"
+#include "protos.h"
+#include "err_protos.h"
+#include "dinode.h"
+#include "versions.h"
+
+static cred_t zerocr;
+static int orphanage_entered;
+
+/*
+ * Data structures and routines to keep track of directory entries
+ * and whether their leaf entry has been seen
+ */
+typedef struct dir_hash_ent {
+       struct dir_hash_ent     *next;  /* pointer to next entry */
+       xfs_dir2_leaf_entry_t   ent;    /* address and hash value */
+       short                   junkit; /* name starts with / */
+       short                   seen;   /* have seen leaf entry */
+} dir_hash_ent_t;
+
+typedef struct dir_hash_tab {
+       int                     size;   /* size of hash table */
+       dir_hash_ent_t          *tab[1];/* actual hash table, variable size */
+} dir_hash_tab_t;
+#define        DIR_HASH_TAB_SIZE(n)    \
+       (offsetof(dir_hash_tab_t, tab) + (sizeof(dir_hash_ent_t *) * (n)))
+#define        DIR_HASH_FUNC(t,a)      ((a) % (t)->size)
+
+/*
+ * Track the contents of the freespace table in a directory.
+ */
+typedef struct freetab {
+       int                     naents;
+       int                     nents;
+       struct freetab_ent {
+               xfs_dir2_data_off_t     v;
+               short                   s;
+       } ents[1];
+} freetab_t;
+#define        FREETAB_SIZE(n) \
+       (offsetof(freetab_t, ents) + (sizeof(struct freetab_ent) * (n)))
+
+#define        DIR_HASH_CK_OK          0
+#define        DIR_HASH_CK_DUPLEAF     1
+#define        DIR_HASH_CK_BADHASH     2
+#define        DIR_HASH_CK_NODATA      3
+#define        DIR_HASH_CK_NOLEAF      4
+#define        DIR_HASH_CK_BADSTALE    5
+
+static void
+dir_hash_add(
+       dir_hash_tab_t          *hashtab,
+       xfs_dahash_t            hash,
+       xfs_dir2_dataptr_t      addr,
+       int                     junk)
+{
+       int                     i;
+       dir_hash_ent_t          *p;
+
+       i = DIR_HASH_FUNC(hashtab, addr);
+       if ((p = malloc(sizeof(*p))) == NULL) {
+               do_error("malloc failed in dir_hash_add (%u bytes)\n",
+                       sizeof(*p));
+               exit(1);
+       }
+       p->next = hashtab->tab[i];
+       hashtab->tab[i] = p;
+       if (!(p->junkit = junk))
+               p->ent.hashval = hash;
+       p->ent.address = addr;
+       p->seen = 0;
+}
+
+static int
+dir_hash_unseen(
+       dir_hash_tab_t  *hashtab)
+{
+       int             i;
+       dir_hash_ent_t  *p;
+
+       for (i = 0; i < hashtab->size; i++) {
+               for (p = hashtab->tab[i]; p; p = p->next) {
+                       if (p->seen == 0)
+                               return 1;
+               }
+       }
+       return 0;
+}
+
+static int
+dir_hash_check(
+       dir_hash_tab_t  *hashtab,
+       xfs_inode_t     *ip,
+       int             seeval)
+{
+       static char     *seevalstr[] = {
+               "ok",
+               "duplicate leaf",
+               "hash value mismatch",
+               "no data entry",
+               "no leaf entry",
+               "bad stale count",
+       };
+
+       if (seeval == DIR_HASH_CK_OK && dir_hash_unseen(hashtab))
+               seeval = DIR_HASH_CK_NOLEAF;
+       if (seeval == DIR_HASH_CK_OK)
+               return 0;
+       do_warn("bad hash table for directory inode %llu (%s): ", ip->i_ino,
+               seevalstr[seeval]);
+       if (!no_modify)
+               do_warn("rebuilding\n");
+       else
+               do_warn("would rebuild\n");
+       return 1;
+}
+
+static void
+dir_hash_done(
+       dir_hash_tab_t  *hashtab)
+{
+       int             i;
+       dir_hash_ent_t  *n;
+       dir_hash_ent_t  *p;
+
+       for (i = 0; i < hashtab->size; i++) {
+               for (p = hashtab->tab[i]; p; p = n) {
+                       n = p->next;
+                       free(p);
+               }
+       }
+       free(hashtab);
+}
+
+static dir_hash_tab_t *
+dir_hash_init(
+       xfs_fsize_t     size)
+{
+       dir_hash_tab_t  *hashtab;
+       int             hsize;
+
+       hsize = size / (16 * 4);
+       if (hsize > 1024)
+               hsize = 1024;
+       else if (hsize < 16)
+               hsize = 16;
+       if ((hashtab = calloc(DIR_HASH_TAB_SIZE(hsize), 1)) == NULL) {
+               do_error("calloc failed in dir_hash_init\n");
+               exit(1);
+       }
+       hashtab->size = hsize;
+       return hashtab;
+}
+
+static int
+dir_hash_see(
+       dir_hash_tab_t          *hashtab,
+       xfs_dahash_t            hash,
+       xfs_dir2_dataptr_t      addr)
+{
+       int                     i;
+       dir_hash_ent_t          *p;
+
+       i = DIR_HASH_FUNC(hashtab, addr);
+       for (p = hashtab->tab[i]; p; p = p->next) {
+               if (p->ent.address != addr)
+                       continue;
+               if (p->seen)
+                       return DIR_HASH_CK_DUPLEAF;
+               if (p->junkit == 0 && p->ent.hashval != hash)
+                       return DIR_HASH_CK_BADHASH;
+               p->seen = 1;
+               return DIR_HASH_CK_OK;
+       }
+       return DIR_HASH_CK_NODATA;
+}
+
+static int
+dir_hash_see_all(
+       dir_hash_tab_t          *hashtab,
+       xfs_dir2_leaf_entry_t   *ents,
+       int                     count,
+       int                     stale)
+{
+       int                     i;
+       int                     j;
+       int                     rval;
+
+       for (i = j = 0; i < count; i++) {
+               if (INT_GET(ents[i].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR) {
+                       j++;
+                       continue;
+               }
+               rval = dir_hash_see(hashtab, INT_GET(ents[i].hashval, ARCH_CONVERT), INT_GET(ents[i].address, ARCH_CONVERT));
+               if (rval != DIR_HASH_CK_OK)
+                       return rval;
+       }
+       return j == stale ? DIR_HASH_CK_OK : DIR_HASH_CK_BADSTALE;
+}
+
+
+/*
+ * Version 1 or 2 directory routine wrappers
+*/
+static void
+dir_init(xfs_mount_t *mp, xfs_trans_t *tp, xfs_inode_t *dp, xfs_inode_t *pdp)
+{
+       if (XFS_SB_VERSION_HASDIRV2(&mp->m_sb))
+               libxfs_dir2_init(tp, dp, pdp);
+       else
+               libxfs_dir_init(tp, dp, pdp);
+}
+
+static int
+dir_createname(xfs_mount_t *mp, xfs_trans_t *tp, xfs_inode_t *pip,
+               char *name, int namelen, xfs_ino_t inum, xfs_fsblock_t *first,
+               xfs_bmap_free_t *flist, xfs_extlen_t total)
+{
+       if (XFS_SB_VERSION_HASDIRV2(&mp->m_sb))
+               return libxfs_dir2_createname(tp, pip, name, namelen,
+                               inum, first, flist, total);
+       else
+               return libxfs_dir_createname(tp, pip, name, namelen,
+                               inum, first, flist, total);
+}
+
+static int
+dir_lookup(xfs_mount_t *mp, xfs_trans_t *tp, xfs_inode_t *dp, char *name,
+               int namelen, xfs_ino_t *inum)
+{
+       if (XFS_SB_VERSION_HASDIRV2(&mp->m_sb))
+               return libxfs_dir2_lookup(tp, dp, name, namelen, inum);
+       else
+               return libxfs_dir_lookup(tp, dp, name, namelen, inum);
+}
+
+static int
+dir_replace(xfs_mount_t *mp, xfs_trans_t *tp, xfs_inode_t *dp, char *name,
+               int namelen, xfs_ino_t inum, xfs_fsblock_t *firstblock,
+               xfs_bmap_free_t *flist, xfs_extlen_t total)
+{
+       if (XFS_SB_VERSION_HASDIRV2(&mp->m_sb))
+               return libxfs_dir2_replace(tp, dp, name, namelen, inum,
+                               firstblock, flist, total);
+       else
+               return libxfs_dir_replace(tp, dp, name, namelen, inum,
+                               firstblock, flist, total);
+}
+
+static int
+dir_removename(xfs_mount_t *mp, xfs_trans_t *tp, xfs_inode_t *dp, char *name,
+               int namelen, xfs_ino_t inum, xfs_fsblock_t *firstblock,
+               xfs_bmap_free_t *flist, xfs_extlen_t total)
+{
+       if (XFS_SB_VERSION_HASDIRV2(&mp->m_sb))
+               return libxfs_dir2_removename(tp, dp, name, namelen, inum,
+                               firstblock, flist, total);
+       else
+               return libxfs_dir_removename(tp, dp, name, namelen, inum,
+                               firstblock, flist, total);
+}
+
+static int
+dir_bogus_removename(xfs_mount_t *mp, xfs_trans_t *tp, xfs_inode_t *dp,
+               char *name, xfs_fsblock_t *firstblock, xfs_bmap_free_t *flist,
+               xfs_extlen_t total, xfs_dahash_t hashval, int namelen)
+{
+       if (XFS_SB_VERSION_HASDIRV2(&mp->m_sb))
+               return libxfs_dir2_bogus_removename(tp, dp, name, firstblock,
+                               flist, total, hashval, namelen);
+       else
+               return libxfs_dir_bogus_removename(tp, dp, name, firstblock,
+                               flist, total, hashval, namelen);
+}
+
+
+static void
+res_failed(
+       int     err)
+{
+       if (err == ENOSPC) {
+               do_error("ran out of disk space!\n");
+       } else
+               do_error("xfs_trans_reserve returned %d\n", err);
+}
+
+void
+mk_rbmino(xfs_mount_t *mp)
+{
+       xfs_trans_t     *tp;
+       xfs_inode_t     *ip;
+       xfs_bmbt_irec_t *ep;
+       xfs_fsblock_t   first;
+       int             i;
+       int             nmap;
+       int             committed;
+       int             error;
+       xfs_bmap_free_t flist;
+       xfs_dfiloff_t   bno;
+       xfs_bmbt_irec_t map[XFS_BMAP_MAX_NMAP];
+
+       /*
+        * first set up inode
+        */
+       tp = libxfs_trans_alloc(mp, 0);
+
+       if (i = libxfs_trans_reserve(tp, 10, 0, 0, 0, 0))
+               res_failed(i);
+
+       error = libxfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0, &ip);
+       if (error) {
+               do_error("couldn't iget realtime bitmap inode -- error - %d\n",
+                       error);
+       }
+
+       bzero(&ip->i_d, sizeof(xfs_dinode_core_t));
+
+       ip->i_d.di_magic = XFS_DINODE_MAGIC;
+       ip->i_d.di_mode = IFREG;
+       ip->i_d.di_version = XFS_DINODE_VERSION_1;
+       ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
+       ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
+
+       ip->i_d.di_nlink = 1;           /* account for sb ptr */
+
+       /*
+        * now the ifork
+        */
+       ip->i_df.if_flags = XFS_IFEXTENTS;
+       ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0;
+       ip->i_df.if_u1.if_extents = NULL;
+
+       ip->i_d.di_size = mp->m_sb.sb_rbmblocks * mp->m_sb.sb_blocksize;
+
+       /*
+        * commit changes
+        */
+       libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+       libxfs_trans_ihold(tp, ip);
+       libxfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_SYNC, NULL);
+
+       /*
+        * then allocate blocks for file and fill with zeroes (stolen
+        * from mkfs)
+        */
+       tp = libxfs_trans_alloc(mp, 0);
+       if (error = libxfs_trans_reserve(tp, mp->m_sb.sb_rbmblocks +
+                       (XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - 1), 0, 0, 0, 0))
+               res_failed(error);
+
+       libxfs_trans_ijoin(tp, ip, 0);
+       bno = 0;
+       XFS_BMAP_INIT(&flist, &first);
+       while (bno < mp->m_sb.sb_rbmblocks) {
+               nmap = XFS_BMAP_MAX_NMAP;
+               error = libxfs_bmapi(tp, ip, bno,
+                         (xfs_extlen_t)(mp->m_sb.sb_rbmblocks - bno),
+                         XFS_BMAPI_WRITE, &first, mp->m_sb.sb_rbmblocks,
+                         map, &nmap, &flist);
+               if (error) {
+                       do_error("couldn't allocate realtime bitmap - err %d\n",
+                               error);
+               }
+               for (i = 0, ep = map; i < nmap; i++, ep++) {
+                       libxfs_device_zero(mp->m_dev,
+                               XFS_FSB_TO_DADDR(mp, ep->br_startblock),
+                               XFS_FSB_TO_BB(mp, ep->br_blockcount));
+                       bno += ep->br_blockcount;
+               }
+       }
+       error = libxfs_bmap_finish(&tp, &flist, first, &committed);
+       if (error) {
+               do_error(
+               "allocation of the realtime bitmap failed, error = %d\n",
+                       error);
+       }
+       libxfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_SYNC, 0);
+}
+
+int
+fill_rbmino(xfs_mount_t *mp)
+{
+       xfs_buf_t       *bp;
+       xfs_trans_t     *tp;
+       xfs_inode_t     *ip;
+       xfs_rtword_t    *bmp;
+       xfs_fsblock_t   first;
+       int             nmap;
+       int             error;
+       xfs_dfiloff_t   bno;
+       xfs_bmbt_irec_t map;
+
+       bmp = btmcompute;
+       bno = 0;
+
+       tp = libxfs_trans_alloc(mp, 0);
+
+       if (error = libxfs_trans_reserve(tp, 10, 0, 0, 0, 0))
+               res_failed(error);
+
+       error = libxfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0, &ip);
+       if (error) {
+               do_error("couldn't iget realtime bitmap inode -- error - %d\n",
+                       error);
+       }
+
+       while (bno < mp->m_sb.sb_rbmblocks)  {
+               /*
+                * fill the file one block at a time
+                */
+               nmap = 1;
+               error = libxfs_bmapi(tp, ip, bno, 1, XFS_BMAPI_WRITE,
+                                       &first, 1, &map, &nmap, NULL);
+               if (error || nmap != 1) {
+                       do_error(
+                       "couldn't map realtime bitmap block %llu - err %d\n",
+                               bno, error);
+               }
+
+               ASSERT(map.br_startblock != HOLESTARTBLOCK);
+
+               error = libxfs_trans_read_buf(
+                               mp, tp, mp->m_dev,
+                               XFS_FSB_TO_DADDR(mp, map.br_startblock), 
+                               XFS_FSB_TO_BB(mp, 1), 1, &bp);
+
+               if (error) {
+                       do_warn(
+       "can't access block %llu (fsbno %llu) of realtime bitmap inode %llu\n",
+                               bno, map.br_startblock, mp->m_sb.sb_rbmino);
+                       return(1);
+               }
+
+               bcopy(bmp, XFS_BUF_PTR(bp), mp->m_sb.sb_blocksize);
+
+               libxfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1);
+
+               bmp = (xfs_rtword_t *)((__psint_t) bmp + mp->m_sb.sb_blocksize);
+               bno++;
+       }
+
+       libxfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_SYNC, 0);
+
+       return(0);
+}
+
+int
+fill_rsumino(xfs_mount_t *mp)
+{
+       xfs_buf_t       *bp;
+       xfs_trans_t     *tp;
+       xfs_inode_t     *ip;
+       xfs_suminfo_t   *smp;
+       xfs_fsblock_t   first;
+       int             nmap;
+       int             error;
+       xfs_dfiloff_t   bno;
+       xfs_dfiloff_t   end_bno;
+       xfs_bmbt_irec_t map;
+
+       smp = sumcompute;
+       bno = 0;
+       end_bno = mp->m_rsumsize >> mp->m_sb.sb_blocklog;
+
+       tp = libxfs_trans_alloc(mp, 0);
+
+       if (error = libxfs_trans_reserve(tp, 10, 0, 0, 0, 0))
+               res_failed(error);
+
+       error = libxfs_trans_iget(mp, tp, mp->m_sb.sb_rsumino, 0, &ip);
+       if (error) {
+               do_error("couldn't iget realtime summary inode -- error - %d\n",
+                       error);
+       }
+
+       while (bno < end_bno)  {
+               /*
+                * fill the file one block at a time
+                */
+               nmap = 1;
+               error = libxfs_bmapi(tp, ip, bno, 1, XFS_BMAPI_WRITE,
+                                       &first, 1, &map, &nmap, NULL);
+               if (error || nmap != 1) {
+                       do_error(
+               "couldn't map realtime summary inode block %llu - err %d\n",
+                               bno, error);
+               }
+
+               ASSERT(map.br_startblock != HOLESTARTBLOCK);
+
+               error = libxfs_trans_read_buf(
+                               mp, tp, mp->m_dev,
+                               XFS_FSB_TO_DADDR(mp, map.br_startblock), 
+                               XFS_FSB_TO_BB(mp, 1), 1, &bp);
+
+               if (error) {
+                       do_warn(
+       "can't access block %llu (fsbno %llu) of realtime summary inode %llu\n",
+                               bno, map.br_startblock, mp->m_sb.sb_rsumino);
+                       return(1);
+               }
+
+               bcopy(smp, XFS_BUF_PTR(bp), mp->m_sb.sb_blocksize);
+
+               libxfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1);
+
+               smp = (xfs_suminfo_t *)((__psint_t)smp + mp->m_sb.sb_blocksize);
+               bno++;
+       }
+
+       libxfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_SYNC, 0);
+
+       return(0);
+}
+
+void
+mk_rsumino(xfs_mount_t *mp)
+{
+       xfs_trans_t     *tp;
+       xfs_inode_t     *ip;
+       xfs_bmbt_irec_t *ep;
+       xfs_fsblock_t   first;
+       int             i;
+       int             nmap;
+       int             committed;
+       int             error;
+       int             nsumblocks;
+       xfs_bmap_free_t flist;
+       xfs_dfiloff_t   bno;
+       xfs_bmbt_irec_t map[XFS_BMAP_MAX_NMAP];
+
+       /*
+        * first set up inode
+        */
+       tp = libxfs_trans_alloc(mp, 0);
+
+       if (i = libxfs_trans_reserve(tp, 10, XFS_ICHANGE_LOG_RES(mp), 0,
+                               XFS_TRANS_PERM_LOG_RES, XFS_MKDIR_LOG_COUNT))
+               res_failed(i);
+
+       error = libxfs_trans_iget(mp, tp, mp->m_sb.sb_rsumino, 0, &ip);
+       if (error) {
+               do_error("couldn't iget realtime summary inode -- error - %d\n",
+                       error);
+       }
+
+       bzero(&ip->i_d, sizeof(xfs_dinode_core_t));
+
+       ip->i_d.di_magic = XFS_DINODE_MAGIC;
+       ip->i_d.di_mode = IFREG;
+       ip->i_d.di_version = XFS_DINODE_VERSION_1;
+       ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
+       ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
+
+       ip->i_d.di_nlink = 1;           /* account for sb ptr */
+
+       /*
+        * now the ifork
+        */
+       ip->i_df.if_flags = XFS_IFEXTENTS;
+       ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0;
+       ip->i_df.if_u1.if_extents = NULL;
+
+       ip->i_d.di_size = mp->m_rsumsize;
+
+       /*
+        * commit changes
+        */
+       libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+       libxfs_trans_ihold(tp, ip);
+       libxfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_SYNC, 0);
+
+       /*
+        * then allocate blocks for file and fill with zeroes (stolen
+        * from mkfs)
+        */
+       tp = libxfs_trans_alloc(mp, 0);
+       XFS_BMAP_INIT(&flist, &first);
+
+       nsumblocks = mp->m_rsumsize >> mp->m_sb.sb_blocklog;
+       if (error = libxfs_trans_reserve(tp,
+                                 mp->m_sb.sb_rbmblocks +
+                                     (XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - 1),
+                                 BBTOB(128), 0, XFS_TRANS_PERM_LOG_RES,
+                                 XFS_DEFAULT_PERM_LOG_COUNT))
+               res_failed(error);
+
+       libxfs_trans_ijoin(tp, ip, 0);
+       bno = 0;
+       XFS_BMAP_INIT(&flist, &first);
+       while (bno < nsumblocks) {
+               nmap = XFS_BMAP_MAX_NMAP;
+               error = libxfs_bmapi(tp, ip, bno,
+                         (xfs_extlen_t)(nsumblocks - bno),
+                         XFS_BMAPI_WRITE, &first, nsumblocks,
+                         map, &nmap, &flist);
+               if (error) {
+                       do_error(
+                       "couldn't allocate realtime summary inode - err %d\n",
+                               error);
+               }
+               for (i = 0, ep = map; i < nmap; i++, ep++) {
+                       libxfs_device_zero(mp->m_dev,
+                                     XFS_FSB_TO_DADDR(mp, ep->br_startblock),
+                                     XFS_FSB_TO_BB(mp, ep->br_blockcount));
+                               do_error("dev_zero of rtbitmap failed\n");
+                       bno += ep->br_blockcount;
+               }
+       }
+       error = libxfs_bmap_finish(&tp, &flist, first, &committed);
+       if (error) {
+               do_error(
+               "allocation of the realtime summary ino failed, err = %d\n",
+                       error);
+       }
+       libxfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_SYNC, 0);
+}
+
+/*
+ * makes a new root directory.
+ */
+void
+mk_root_dir(xfs_mount_t *mp)
+{
+       xfs_trans_t     *tp;
+       xfs_inode_t     *ip;
+       int             i;
+       int             error;
+       const mode_t    mode = 0755;
+
+       tp = libxfs_trans_alloc(mp, 0);
+       ip = NULL;
+
+       if (i = libxfs_trans_reserve(tp, 10, XFS_ICHANGE_LOG_RES(mp), 0,
+                               XFS_TRANS_PERM_LOG_RES, XFS_MKDIR_LOG_COUNT))
+               res_failed(i);
+
+       error = libxfs_trans_iget(mp, tp, mp->m_sb.sb_rootino, 0, &ip);
+       if (error) {
+               do_error("could not iget root inode -- error - %d\n", error);
+       }
+
+       /*
+        * take care of the core -- initialization from xfs_ialloc()
+        */
+       bzero(&ip->i_d, sizeof(xfs_dinode_core_t));
+
+       ip->i_d.di_magic = XFS_DINODE_MAGIC;
+       ip->i_d.di_mode = (__uint16_t) mode|IFDIR;
+       ip->i_d.di_version = XFS_DINODE_VERSION_1;
+       ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
+       ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
+
+       ip->i_d.di_nlink = 1;           /* account for . */
+
+       libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
+       /*
+        * now the ifork
+        */
+       ip->i_df.if_flags = XFS_IFEXTENTS;
+       ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0;
+       ip->i_df.if_u1.if_extents = NULL;
+
+       mp->m_rootip = ip;
+
+       /*
+        * initialize the directory
+        */
+       dir_init(mp, tp, ip, ip);
+
+       libxfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_SYNC, 0);
+}
+
+/*
+ * orphanage name == lost+found
+ */
+xfs_ino_t
+mk_orphanage(xfs_mount_t *mp)
+{
+       xfs_ino_t       ino;
+       xfs_trans_t     *tp;
+       xfs_inode_t     *ip;
+       xfs_inode_t     *pip;
+       xfs_fsblock_t   first;
+       int             i;
+       int             committed;
+       int             error;
+       xfs_bmap_free_t flist;
+       const int       mode = 0755;
+       const int       uid = 0;
+       const int       gid = 0;
+       int             nres;
+
+       tp = libxfs_trans_alloc(mp, 0);
+       XFS_BMAP_INIT(&flist, &first);
+
+       nres = XFS_MKDIR_SPACE_RES(mp, strlen(ORPHANAGE));
+       if (i = libxfs_trans_reserve(tp, nres, XFS_MKDIR_LOG_RES(mp), 0,
+                               XFS_TRANS_PERM_LOG_RES, XFS_MKDIR_LOG_COUNT))
+               res_failed(i);
+
+       /*
+        * use iget/ijoin instead of trans_iget because the ialloc
+        * wrapper can commit the transaction and start a new one
+        */
+       if (i = libxfs_iget(mp, NULL, mp->m_sb.sb_rootino, 0, &pip, 0))
+               do_error("%d - couldn't iget root inode to make %s\n",
+                       i, ORPHANAGE);
+
+       error = libxfs_inode_alloc(&tp, pip, mode|IFDIR,
+                                       1, mp->m_dev, &zerocr, &ip);
+
+       if (error) {
+               do_error("%s inode allocation failed %d\n",
+                       ORPHANAGE, error);
+       }
+
+       ip->i_d.di_uid = uid;
+       ip->i_d.di_gid = gid;
+       ip->i_d.di_nlink++;             /* account for . */
+
+       /*
+        * now that we know the transaction will stay around,
+        * add the root inode to it
+        */
+       libxfs_trans_ijoin(tp, pip, 0);
+
+       /*
+        * create the actual entry
+        */
+       if (error = dir_createname(mp, tp, pip, ORPHANAGE,
+                       strlen(ORPHANAGE), ip->i_ino, &first, &flist, nres)) {
+               do_warn("can't make %s, createname error %d, will try later\n",
+                       ORPHANAGE, error);
+               orphanage_entered = 0;
+       } else
+               orphanage_entered = 1;
+
+       /* 
+        * bump up the link count in the root directory to account
+        * for .. in the new directory
+        */
+       pip->i_d.di_nlink++;
+
+       libxfs_trans_log_inode(tp, pip, XFS_ILOG_CORE);
+       dir_init(mp, tp, ip, pip);
+       libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
+       error = libxfs_bmap_finish(&tp, &flist, first, &committed);
+       if (error) {
+               do_error("%s directory creation failed -- bmapf error %d\n",
+                       ORPHANAGE, error);
+       }
+
+       ino = ip->i_ino;
+
+       libxfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_SYNC, 0);
+
+       /* need libxfs_iput here? - nathans TODO - possible memory leak? */
+
+       return(ino);
+}
+
+/*
+ * move a file to the orphange.  the orphanage is guaranteed
+ * at this point to only have file in it whose name == file inode #
+ */
+void
+mv_orphanage(xfs_mount_t       *mp,
+               xfs_ino_t       dir_ino,        /* orphange inode # */
+               xfs_ino_t       ino,            /* inode # to be moved */
+               int             isa_dir)        /* 1 if inode is a directory */
+{
+       xfs_ino_t       entry_ino_num;
+       xfs_inode_t     *dir_ino_p;
+       xfs_inode_t     *ino_p;
+       xfs_trans_t     *tp;
+       xfs_fsblock_t   first;
+       xfs_bmap_free_t flist;
+       int             err;
+       int             committed;
+       char            fname[MAXPATHLEN + 1];
+       int             nres;
+
+       sprintf(fname, "%llu", ino);
+
+       if (err = libxfs_iget(mp, NULL, dir_ino, 0, &dir_ino_p, 0))
+               do_error("%d - couldn't iget orphanage inode\n", err);
+
+       tp = libxfs_trans_alloc(mp, 0);
+
+       if (err = libxfs_iget(mp, NULL, ino, 0, &ino_p, 0))
+               do_error("%d - couldn't iget disconnected inode\n", err);
+
+       if (isa_dir)  {
+               nres = XFS_DIRENTER_SPACE_RES(mp, strlen(fname)) +
+                      XFS_DIRENTER_SPACE_RES(mp, 2);
+               if (err = dir_lookup(mp, tp, ino_p, "..", 2,
+                               &entry_ino_num))  {
+                       ASSERT(err == ENOENT);
+
+                       if (err = libxfs_trans_reserve(tp, nres,
+                                       XFS_RENAME_LOG_RES(mp), 0,
+                                       XFS_TRANS_PERM_LOG_RES,
+                                       XFS_RENAME_LOG_COUNT))
+                               do_error(
+               "space reservation failed (%d), filesystem may be out of space\n",
+                                       err);
+
+                       libxfs_trans_ijoin(tp, dir_ino_p, 0);
+                       libxfs_trans_ijoin(tp, ino_p, 0);
+
+                       XFS_BMAP_INIT(&flist, &first);
+                       if (err = dir_createname(mp, tp, dir_ino_p, fname,
+                                               strlen(fname), ino, &first,
+                                               &flist, nres))
+                               do_error(
+       "name create failed in %s (%d), filesystem may be out of space\n",
+                                       ORPHANAGE, err);
+
+                       dir_ino_p->i_d.di_nlink++;
+                       libxfs_trans_log_inode(tp, dir_ino_p, XFS_ILOG_CORE);
+
+                       if (err = dir_createname(mp, tp, ino_p, "..", 2,
+                                               dir_ino, &first, &flist, nres))
+                               do_error(
+       "creation of .. entry failed (%d), filesystem may be out of space\n",
+                                       err);
+
+                       ino_p->i_d.di_nlink++;
+                       libxfs_trans_log_inode(tp, ino_p, XFS_ILOG_CORE);
+
+                       if (err = libxfs_bmap_finish(&tp, &flist, first, &committed))
+                               do_error(
+       "bmap finish failed (err - %d), filesystem may be out of space\n",
+                                       err);
+
+                       libxfs_trans_commit(tp,
+                               XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_SYNC, 0);
+               } else  {
+                       if (err = libxfs_trans_reserve(tp, nres,
+                                       XFS_RENAME_LOG_RES(mp), 0,
+                                       XFS_TRANS_PERM_LOG_RES,
+                                       XFS_RENAME_LOG_COUNT))
+                               do_error(
+       "space reservation failed (%d), filesystem may be out of space\n",
+                                       err);
+
+                       libxfs_trans_ijoin(tp, dir_ino_p, 0);
+                       libxfs_trans_ijoin(tp, ino_p, 0);
+
+                       XFS_BMAP_INIT(&flist, &first);
+
+                       if (err = dir_createname(mp, tp, dir_ino_p, fname,
+                                               strlen(fname), ino, &first,
+                                               &flist, nres))
+                               do_error(
+       "name create failed in %s (%d), filesystem may be out of space\n",
+                                       ORPHANAGE, err);
+
+                       dir_ino_p->i_d.di_nlink++;
+                       libxfs_trans_log_inode(tp, dir_ino_p, XFS_ILOG_CORE);
+
+                       /*
+                        * don't replace .. value if it already points
+                        * to us.  that'll pop a libxfs/kernel ASSERT.
+                        */
+                       if (entry_ino_num != dir_ino)  {
+                               if (err = dir_replace(mp, tp, ino_p, "..",
+                                                       2, dir_ino, &first,
+                                                       &flist, nres))
+                                       do_error(
+               "name replace op failed (%d), filesystem may be out of space\n",
+                                               err);
+                       }
+
+                       if (err = libxfs_bmap_finish(&tp, &flist, first,
+                                                       &committed))
+                               do_error(
+               "bmap finish failed (%d), filesystem may be out of space\n",
+                                       err);
+
+                       libxfs_trans_commit(tp,
+                               XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_SYNC, 0);
+               }
+       } else  {
+               /*
+                * use the remove log reservation as that's
+                * more accurate.  we're only creating the
+                * links, we're not doing the inode allocation
+                * also accounted for in the create
+                */
+               nres = XFS_DIRENTER_SPACE_RES(mp, strlen(fname));
+               if (err = libxfs_trans_reserve(tp, nres, XFS_REMOVE_LOG_RES(mp), 0,
+                               XFS_TRANS_PERM_LOG_RES, XFS_REMOVE_LOG_COUNT))
+                       do_error(
+       "space reservation failed (%d), filesystem may be out of space\n",
+                               err);
+
+               libxfs_trans_ijoin(tp, dir_ino_p, 0);
+               libxfs_trans_ijoin(tp, ino_p, 0);
+
+               XFS_BMAP_INIT(&flist, &first);
+               if (err = dir_createname(mp, tp, dir_ino_p, fname,
+                               strlen(fname), ino, &first, &flist, nres))
+                       do_error(
+       "name create failed in %s (%d), filesystem may be out of space\n",
+                               ORPHANAGE, err);
+               ASSERT(err == 0);
+
+               ino_p->i_d.di_nlink = 1;
+               libxfs_trans_log_inode(tp, ino_p, XFS_ILOG_CORE);
+
+               if (err = libxfs_bmap_finish(&tp, &flist, first, &committed))
+                       do_error(
+               "bmap finish failed (%d), filesystem may be out of space\n",
+                               err);
+
+               libxfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_SYNC, 0);
+       }
+}
+
+/*
+ * like get_first_dblock_fsbno only it uses the simulation code instead
+ * of raw I/O.
+ *
+ * Returns the fsbno of the first (leftmost) block in the directory leaf.
+ * sets *bno to the directory block # corresponding to the returned fsbno.
+ */
+xfs_dfsbno_t
+map_first_dblock_fsbno(xfs_mount_t     *mp,
+                       xfs_ino_t       ino,
+                       xfs_inode_t     *ip,
+                       xfs_dablk_t     *bno)
+{
+       xfs_fsblock_t           fblock;
+       xfs_da_intnode_t        *node;
+       xfs_buf_t               *bp;
+       xfs_dablk_t             da_bno;
+       xfs_dfsbno_t            fsbno;
+       xfs_bmbt_irec_t         map;
+       int                     nmap;
+       int                     i;
+       int                     error;
+       char                    *ftype;
+
+       /*
+        * traverse down left-side of tree until we hit the
+        * left-most leaf block setting up the btree cursor along
+        * the way.
+        */
+       da_bno = 0;
+       *bno = 0;
+       i = -1;
+       node = NULL;
+       fblock = NULLFSBLOCK;
+       ftype = "dir";
+
+       nmap = 1;
+       error = libxfs_bmapi(NULL, ip, (xfs_fileoff_t) da_bno, 1,
+                       XFS_BMAPI_METADATA, &fblock, 0,
+                       &map, &nmap, NULL);
+       if (error || nmap != 1)  {
+               if (!no_modify)
+                       do_error(
+"can't map block %d in %s inode %llu, xfs_bmapi returns %d, nmap = %d\n",
+                               da_bno, ftype, ino, error, nmap);
+               else  {
+                       do_warn(
+"can't map block %d in %s inode %llu, xfs_bmapi returns %d, nmap = %d\n",
+                               da_bno, ftype, ino, error, nmap);
+                       return(NULLDFSBNO);
+               }
+       }
+
+       if ((fsbno = map.br_startblock) == HOLESTARTBLOCK)  {
+               if (!no_modify)
+                       do_error("block %d in %s ino %llu doesn't exist\n",
+                               da_bno, ftype, ino);
+               else  {
+                       do_warn("block %d in %s ino %llu doesn't exist\n",
+                               da_bno, ftype, ino);
+                       return(NULLDFSBNO);
+               }
+       }
+
+       if (ip->i_d.di_size <= XFS_LBSIZE(mp))
+               return(fsbno);
+
+       if (XFS_SB_VERSION_HASDIRV2(&mp->m_sb))
+               return(fsbno);
+
+       do {
+               /*
+                * walk down left side of btree, release buffers as you
+                * go.  if the root block is a leaf (single-level btree),
+                * just return it.
+                * 
+                */
+
+               bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, fsbno),
+                               XFS_FSB_TO_BB(mp, 1), 0);
+
+               if (!bp) {
+                       do_warn(
+               "can't read block %u (fsbno %llu) for directory inode %llu\n",
+                                       da_bno, fsbno, ino);
+                       return(NULLDFSBNO);
+               }
+
+               node = (xfs_da_intnode_t *)XFS_BUF_PTR(bp);
+
+               if (INT_GET(node->hdr.info.magic, ARCH_CONVERT) != XFS_DA_NODE_MAGIC)  {
+                       libxfs_putbuf(bp);
+                       do_warn(
+"bad dir/attr magic number in inode %llu, file bno = %u, fsbno = %llu\n",
+                               ino, da_bno, fsbno);
+                       return(NULLDFSBNO);
+               }
+
+               if (i == -1)
+                       i = INT_GET(node->hdr.level, ARCH_CONVERT);
+
+               da_bno = INT_GET(node->btree[0].before, ARCH_CONVERT);
+
+               libxfs_putbuf(bp);
+               bp = NULL;
+
+               nmap = 1;
+               error = libxfs_bmapi(NULL, ip, (xfs_fileoff_t) da_bno, 1,
+                               XFS_BMAPI_METADATA, &fblock, 0,
+                               &map, &nmap, NULL);
+               if (error || nmap != 1)  {
+                       if (!no_modify)
+                               do_error(
+       "can't map block %d in %s ino %llu, xfs_bmapi returns %d, nmap = %d\n",
+                                       da_bno, ftype, ino, error, nmap);
+                       else  {
+                               do_warn(
+       "can't map block %d in %s ino %llu, xfs_bmapi returns %d, nmap = %d\n",
+                                       da_bno, ftype, ino, error, nmap);
+                               return(NULLDFSBNO);
+                       }
+               }
+               if ((fsbno = map.br_startblock) == HOLESTARTBLOCK)  {
+                       if (!no_modify)
+                               do_error(
+                               "block %d in %s inode %llu doesn't exist\n",
+                                       da_bno, ftype, ino);
+                       else  {
+                               do_warn(
+                               "block %d in %s inode %llu doesn't exist\n",
+                                       da_bno, ftype, ino);
+                               return(NULLDFSBNO);
+                       }
+               }
+
+               i--;
+       } while(i > 0);
+
+       *bno = da_bno;
+       return(fsbno);
+}
+
+/*
+ * scan longform directory and prune first bad entry.  returns 1 if
+ * it had to remove something, 0 if it made it all the way through
+ * the directory.  prune_lf_dir_entry does all the necessary bmap calls.
+ *
+ * hashval is an in/out -- starting hashvalue in, hashvalue of the
+ *                     deleted entry (if there was one) out
+ *
+ * this routine can NOT be called if running in no modify mode
+ */
+int
+prune_lf_dir_entry(xfs_mount_t *mp, xfs_ino_t ino, xfs_inode_t *ip,
+                       xfs_dahash_t *hashval)
+{
+       xfs_dfsbno_t            fsbno;
+       int                     i;
+       int                     index;
+       int                     error;
+       int                     namelen;
+       xfs_bmap_free_t         free_list;
+       xfs_fsblock_t           first_block;
+       xfs_buf_t               *bp;
+       xfs_dir_leaf_name_t     *namest;
+       xfs_dir_leafblock_t     *leaf;
+       xfs_dir_leaf_entry_t    *entry;
+       xfs_trans_t             *tp;
+       xfs_dablk_t             da_bno;
+       xfs_fsblock_t           fblock;
+       int                     committed;
+       int                     nmap;
+       xfs_bmbt_irec_t         map;
+       char                    fname[MAXNAMELEN + 1];
+       char                    *ftype;
+       int                     nres;
+
+       /*
+        * ok, this is kind of a schizoid routine.  we use our
+        * internal bmapi routines to walk the directory.  when
+        * we find a bogus entry, we release the buffer so
+        * the simulation code doesn't deadlock and use the
+        * sim code to remove the entry.  That will cause an
+        * extra bmap traversal to map the block but I think
+        * that's preferable to hacking the bogus removename
+        * function to be really different and then trying to
+        * maintain both versions as time goes on.
+        *
+        * first, grab the dinode and find the right leaf block.
+        */
+
+       ftype = "dir";
+       da_bno = 0;
+       bp = NULL;
+       namest = NULL;
+       fblock = NULLFSBLOCK;
+
+       fsbno = map_first_dblock_fsbno(mp, ino, ip, &da_bno);
+
+       /*
+        * now go foward along the leaves of the btree looking
+        * for an entry beginning with '/'
+        */
+       do {
+               bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, fsbno),
+                               XFS_FSB_TO_BB(mp, 1), 0);
+
+               if (!bp)  {
+                       do_error(
+       "can't read directory inode %llu (leaf) block %u (fsbno %llu)\n",
+                               ino, da_bno, fsbno);
+                       /* NOTREACHED */
+               }
+
+               leaf = (xfs_dir_leafblock_t *)XFS_BUF_PTR(bp);
+               ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
+               entry = &leaf->entries[0];
+
+               for (index = -1, i = 0;
+                               i < INT_GET(leaf->hdr.count, ARCH_CONVERT) && index == -1;
+                               i++)  {
+                       namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT));
+                       if (namest->name[0] != '/')
+                               entry++;
+                       else
+                               index = i;
+               }
+
+               /*
+                * if we got a bogus entry, exit loop with a pointer to
+                * the leaf block buffer.  otherwise, keep trying blocks
+                */
+               da_bno = INT_GET(leaf->hdr.info.forw, ARCH_CONVERT);
+
+               if (index == -1)  {
+                       if (bp != NULL)  {
+                               libxfs_putbuf(bp);
+                               bp = NULL;
+                       }
+
+                       /*
+                        * map next leaf block unless we've run out
+                        */
+                       if (da_bno != 0)  {
+                               nmap = 1;
+                               error = libxfs_bmapi(NULL, ip,
+                                               (xfs_fileoff_t) da_bno, 1,
+                                               XFS_BMAPI_METADATA, &fblock, 0,
+                                               &map, &nmap, NULL);
+                               if (error || nmap != 1)
+                                       do_error(
+"can't map block %d in directory %llu, xfs_bmapi returns %d, nmap = %d\n",
+                                               da_bno, ino, error, nmap);
+                               if ((fsbno = map.br_startblock)
+                                               == HOLESTARTBLOCK)  {
+                                       do_error(
+                               "%s ino %llu block %d doesn't exist\n",
+                                               ftype, ino, da_bno);
+                               }
+                       }
+               }
+       } while (da_bno != 0 && index == -1);
+
+       /*
+        * if we hit the edge of the tree with no bad entries, we're done
+        * and the buffer was released.
+        */
+       if (da_bno == 0 && index == -1)
+               return(0);
+
+       ASSERT(index >= 0);
+       ASSERT(entry == &leaf->entries[index]);
+       ASSERT(namest == XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT)));
+
+       /*
+        * snag the info we need out of the directory then release all buffers
+        */
+       bcopy(namest->name, fname, entry->namelen);
+       fname[entry->namelen] = '\0';
+       *hashval = INT_GET(entry->hashval, ARCH_CONVERT);
+       namelen = entry->namelen;
+
+       libxfs_putbuf(bp);
+
+       /*
+        * ok, now the hard part, blow away the index'th entry in this block
+        *
+        * allocate a remove transaction for it.  that's not quite true since
+        * we're only messing with one inode, not two but...
+        */
+
+       tp = libxfs_trans_alloc(mp, XFS_TRANS_REMOVE);
+
+       nres = XFS_REMOVE_SPACE_RES(mp);
+       error = libxfs_trans_reserve(tp, nres, XFS_REMOVE_LOG_RES(mp),
+                                   0, XFS_TRANS_PERM_LOG_RES,
+                                   XFS_REMOVE_LOG_COUNT);
+       if (error)
+               res_failed(error);
+
+       libxfs_trans_ijoin(tp, ip, 0);
+       libxfs_trans_ihold(tp, ip);
+
+       XFS_BMAP_INIT(&free_list, &first_block);
+
+       error = dir_bogus_removename(mp, tp, ip, fname,
+               &first_block, &free_list, nres, *hashval, namelen);
+
+       if (error)  {
+               do_error(
+"couldn't remove bogus entry \"%s\" in\n\tdirectory inode %llu, errno = %d\n",
+                       fname, ino, error);
+               /* NOTREACHED */
+       }
+
+       error = libxfs_bmap_finish(&tp, &free_list, first_block, &committed);
+
+       ASSERT(error == 0);
+
+       libxfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_SYNC, 0);
+
+       return(1);
+}
+
+/*
+ * process a leaf block, also checks for .. entry
+ * and corrects it to match what we think .. should be
+ */
+void
+lf_block_dir_entry_check(xfs_mount_t           *mp,
+                       xfs_ino_t               ino,
+                       xfs_dir_leafblock_t     *leaf,
+                       int                     *dirty,
+                       int                     *num_illegal,
+                       int                     *need_dot,
+                       dir_stack_t             *stack,
+                       ino_tree_node_t         *current_irec,
+                       int                     current_ino_offset)
+{
+       xfs_dir_leaf_entry_t    *entry;
+       ino_tree_node_t         *irec;
+       xfs_ino_t               lino;
+       xfs_ino_t               parent;
+       xfs_dir_leaf_name_t     *namest;
+       int                     i;
+       int                     junkit;
+       int                     ino_offset;
+       int                     nbad;
+       char                    fname[MAXNAMELEN + 1];
+
+       entry = &leaf->entries[0];
+       *dirty = 0;
+       nbad = 0;
+
+       /*
+        * look at each entry.  reference inode pointed to by each
+        * entry in the incore inode tree.
+        * if not a directory, set reached flag, increment link count
+        * if a directory and reached, mark entry as to be deleted.
+        * if a directory, check to see if recorded parent
+        *      matches current inode #,
+        *      if so, then set reached flag, increment link count
+        *              of current and child dir inodes, push the child
+        *              directory inode onto the directory stack.
+        *      if current inode != parent, then mark entry to be deleted.
+        *
+        * return
+        */
+       for (i = 0; i < INT_GET(leaf->hdr.count, ARCH_CONVERT); entry++, i++)  {
+               /*
+                * snag inode #, update link counts, and make sure
+                * this isn't a loop if the child is a directory
+                */
+               namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT));
+
+               /*
+                * skip bogus entries (leading '/').  they'll be deleted
+                * later
+                */
+               if (namest->name[0] == '/')  {
+                       nbad++;
+                       continue;
+               }
+
+               junkit = 0;
+
+               XFS_DIR_SF_GET_DIRINO_ARCH(&namest->inumber, &lino, ARCH_CONVERT);
+               bcopy(namest->name, fname, entry->namelen);
+               fname[entry->namelen] = '\0';
+
+               ASSERT(lino != NULLFSINO);
+
+               /*
+                * skip the '..' entry since it's checked when the
+                * directory is reached by something else.  if it never
+                * gets reached, it'll be moved to the orphanage and we'll
+                * take care of it then.
+                */
+               if (entry->namelen == 2 && namest->name[0] == '.' &&
+                               namest->name[1] == '.')  {
+                       continue;
+               }
+               ASSERT(no_modify || !verify_inum(mp, lino));
+
+               /*
+                * special case the . entry.  we know there's only one
+                * '.' and only '.' points to itself because bogus entries
+                * got trashed in phase 3 if there were > 1.
+                * bump up link count for '.' but don't set reached
+                * until we're actually reached by another directory
+                * '..' is already accounted for or will be taken care
+                * of when directory is moved to orphanage.
+                */
+               if (ino == lino)  {
+                       ASSERT(namest->name[0] == '.' && entry->namelen == 1);
+                       add_inode_ref(current_irec, current_ino_offset);
+                       *need_dot = 0;
+                       continue;
+               }
+
+               /*
+                * special case the "lost+found" entry if pointing
+                * to where we think lost+found should be.  if that's
+                * the case, that's the one we created in phase 6.
+                * just skip it.  no need to process it and it's ..
+                * link is already accounted for.
+                */
+
+               if (lino == orphanage_ino && strcmp(fname, ORPHANAGE) == 0)
+                       continue;
+
+               /*
+                * skip entries with bogus inumbers if we're in no modify mode
+                */
+               if (no_modify && verify_inum(mp, lino))
+                       continue;
+
+               /*
+                * ok, now handle the rest of the cases besides '.' and '..'
+                */
+               irec = find_inode_rec(XFS_INO_TO_AGNO(mp, lino),
+                                       XFS_INO_TO_AGINO(mp, lino));
+               
+               if (irec == NULL)  {
+                       nbad++;
+                       do_warn(
+       "entry \"%s\" in dir inode %llu points to non-existent inode, ",
+                               fname, ino);
+
+                       if (!no_modify)  {
+                               namest->name[0] = '/';
+                               *dirty = 1;
+                               do_warn("marking entry to be junked\n");
+                       } else  {
+                               do_warn("would junk entry\n");
+                       }
+
+                       continue;
+               }
+
+               ino_offset = XFS_INO_TO_AGINO(mp, lino) - irec->ino_startnum;
+
+               /*
+                * if it's a free inode, blow out the entry.
+                * by now, any inode that we think is free
+                * really is free.
+                */
+               if (is_inode_free(irec, ino_offset))  {
+                       /*
+                        * don't complain if this entry points to the old
+                        * and now-free lost+found inode
+                        */
+                       if (verbose || no_modify || lino != old_orphanage_ino)
+                               do_warn(
+               "entry \"%s\" in dir inode %llu points to free inode %llu",
+                                       fname, ino, lino);
+                       nbad++;
+
+                       if (!no_modify)  {
+                               if (verbose || lino != old_orphanage_ino)
+                                       do_warn(", marking entry to be junked\n");
+
+                               else
+                                       do_warn("\n");
+                               namest->name[0] = '/';
+                               *dirty = 1;
+                       } else  {
+                               do_warn(", would junk entry\n");
+                       }
+
+                       continue;
+               }
+
+               /*
+                * check easy case first, regular inode, just bump
+                * the link count and continue
+                */
+               if (!inode_isadir(irec, ino_offset))  {
+                       add_inode_reached(irec, ino_offset);
+                       continue;
+               }
+
+               parent = get_inode_parent(irec, ino_offset);
+               ASSERT(parent != 0);
+
+               /*
+                * bump up the link counts in parent and child
+                * directory but if the link doesn't agree with
+                * the .. in the child, blow out the entry.
+                * if the directory has already been reached,
+                * blow away the entry also.
+                */
+               if (is_inode_reached(irec, ino_offset))  {
+                       junkit = 1;
+                       do_warn(
+"entry \"%s\" in dir %llu points to an already connected dir inode %llu,\n",
+                               fname, ino, lino);
+               } else if (parent == ino)  {
+                       add_inode_reached(irec, ino_offset);
+                       add_inode_ref(current_irec, current_ino_offset);
+
+                       if (!is_inode_refchecked(lino, irec, ino_offset))
+                               push_dir(stack, lino);
+               } else  {
+                       junkit = 1;
+                       do_warn(
+"entry \"%s\" in dir ino %llu not consistent with .. value (%llu) in ino %llu,\n",
+                               fname, ino, parent, lino);
+               }
+
+               if (junkit)  {
+                       junkit = 0;
+                       nbad++;
+
+                       if (!no_modify)  {
+                               namest->name[0] = '/';
+                               *dirty = 1;
+                               if (verbose || lino != old_orphanage_ino)
+                                       do_warn("\twill clear entry \"%s\"\n",
+                                               fname);
+                       } else  {
+                               do_warn("\twould clear entry \"%s\"\n", fname);
+                       }
+               }
+       }
+
+       *num_illegal += nbad;
+}
+
+/*
+ * succeeds or dies, inode never gets dirtied since all changes
+ * happen in file blocks.  the inode size and other core info
+ * is already correct, it's just the leaf entries that get altered.
+ */
+void
+longform_dir_entry_check(xfs_mount_t   *mp,
+                       xfs_ino_t       ino,
+                       xfs_inode_t     *ip,
+                       int             *num_illegal,
+                       int             *need_dot,
+                       dir_stack_t     *stack,
+                       ino_tree_node_t *irec,
+                       int             ino_offset)
+{
+       xfs_dir_leafblock_t     *leaf;
+       xfs_buf_t               *bp;
+       xfs_dfsbno_t            fsbno;
+       xfs_fsblock_t           fblock;
+       xfs_dablk_t             da_bno;
+       int                     dirty;
+       int                     nmap;
+       int                     error;
+       int                     skipit;
+       xfs_bmbt_irec_t         map;
+       char                    *ftype;
+
+       da_bno = 0;
+       fblock = NULLFSBLOCK;
+       *need_dot = 1;
+       ftype = "dir";
+
+       fsbno = map_first_dblock_fsbno(mp, ino, ip, &da_bno);
+
+       if (fsbno == NULLDFSBNO && no_modify)  {
+               do_warn("cannot map block 0 of directory inode %llu\n", ino);
+               return;
+       }
+
+       do {
+               ASSERT(fsbno != NULLDFSBNO);
+               skipit = 0;
+
+               bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, fsbno),
+                               XFS_FSB_TO_BB(mp, 1), 0);
+
+               if (!bp) {
+                       do_error(
+               "can't read block %u (fsbno %llu) for directory inode %llu\n",
+                                       da_bno, fsbno, ino);
+                       /* NOTREACHED */
+               }
+
+               leaf = (xfs_dir_leafblock_t *)XFS_BUF_PTR(bp);
+
+               da_bno = INT_GET(leaf->hdr.info.forw, ARCH_CONVERT);
+
+               if (INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) != XFS_DIR_LEAF_MAGIC)  {
+                       if (!no_modify)  {
+                               do_error(
+       "bad magic # (0x%x) for dir ino %llu leaf block (bno %u fsbno %llu)\n",
+                                       INT_GET(leaf->hdr.info.magic, ARCH_CONVERT),
+                                       ino, da_bno, fsbno);
+                               /* NOTREACHED */
+                       } else  {
+                               /*
+                                * this block's bad but maybe the
+                                * forward pointer is good...
+                                */
+                               skipit = 1;
+                               dirty = 0;
+                       }
+               }
+
+               if (!skipit)
+                       lf_block_dir_entry_check(mp, ino, leaf, &dirty,
+                                               num_illegal, need_dot, stack,
+                                               irec, ino_offset);
+
+               ASSERT(dirty == 0 || dirty && !no_modify);
+
+               if (dirty && !no_modify)
+                       libxfs_writebuf(bp, 0);
+               else
+                       libxfs_putbuf(bp);
+               bp = NULL;
+
+               if (da_bno != 0)  {
+                       nmap = 1;
+                       error = libxfs_bmapi(NULL, ip, (xfs_fileoff_t)da_bno, 1,
+                                       XFS_BMAPI_METADATA, &fblock, 0,
+                                       &map, &nmap, NULL);
+                       if (error || nmap != 1)  {
+                               if (!no_modify)
+                                       do_error(
+"can't map leaf block %d in dir %llu, xfs_bmapi returns %d, nmap = %d\n",
+                                               da_bno, ino, error, nmap);
+                               else  {
+                                       do_warn(
+"can't map leaf block %d in dir %llu, xfs_bmapi returns %d, nmap = %d\n",
+                                               da_bno, ino, error, nmap);
+                                       return;
+                               }
+                       }
+                       if ((fsbno = map.br_startblock) == HOLESTARTBLOCK)  {
+                               if (!no_modify)
+                                       do_error(
+                               "block %d in %s ino %llu doesn't exist\n",
+                                               da_bno, ftype, ino);
+                               else  {
+                                       do_warn(
+                               "block %d in %s ino %llu doesn't exist\n",
+                                               da_bno, ftype, ino);
+                                       return;
+                               }
+                       }
+               }
+       } while (da_bno != 0);
+}
+
+/*
+ * Kill a block in a version 2 inode.
+ * Makes its own transaction.
+ */
+static void
+dir2_kill_block(
+       xfs_mount_t     *mp,
+       xfs_inode_t     *ip,
+       xfs_dablk_t     da_bno,
+       xfs_dabuf_t     *bp)
+{
+       xfs_da_args_t   args;
+       int             committed;
+       int             error;
+       xfs_fsblock_t   firstblock;
+       xfs_bmap_free_t flist;
+       int             nres;
+       xfs_trans_t     *tp;
+
+       tp = libxfs_trans_alloc(mp, 0);
+       nres = XFS_REMOVE_SPACE_RES(mp);
+       error = libxfs_trans_reserve(tp, nres, XFS_REMOVE_LOG_RES(mp), 0,
+                       XFS_TRANS_PERM_LOG_RES, XFS_REMOVE_LOG_COUNT);
+       if (error)
+               res_failed(error);
+       libxfs_trans_ijoin(tp, ip, 0);
+       libxfs_trans_ihold(tp, ip);
+       libxfs_da_bjoin(tp, bp);
+       bzero(&args, sizeof(args));
+       XFS_BMAP_INIT(&flist, &firstblock);
+       args.dp = ip;
+       args.trans = tp;
+       args.firstblock = &firstblock;
+       args.flist = &flist;
+       args.whichfork = XFS_DATA_FORK;
+       if (da_bno >= mp->m_dirleafblk && da_bno < mp->m_dirfreeblk)
+               error = libxfs_da_shrink_inode(&args, da_bno, bp);
+       else
+               error = libxfs_dir2_shrink_inode(&args,
+                               XFS_DIR2_DA_TO_DB(mp, da_bno), bp);
+       if (error)
+               do_error("shrink_inode failed inode %llu block %u\n",
+                       ip->i_ino, da_bno);
+       libxfs_bmap_finish(&tp, &flist, firstblock, &committed);
+       libxfs_trans_commit(tp, 0, 0);
+}
+
+/*
+ * process a data block, also checks for .. entry
+ * and corrects it to match what we think .. should be
+ */
+static void
+longform_dir2_entry_check_data(
+       xfs_mount_t             *mp,
+       xfs_inode_t             *ip,
+       int                     *num_illegal,
+       int                     *need_dot,
+       dir_stack_t             *stack,
+       ino_tree_node_t         *current_irec,
+       int                     current_ino_offset,
+       xfs_dabuf_t             **bpp,
+       dir_hash_tab_t          *hashtab,
+       freetab_t               **freetabp,
+       xfs_dablk_t             da_bno,
+       int                     isblock)
+{
+       xfs_dir2_dataptr_t      addr;
+       xfs_dir2_leaf_entry_t   *blp;
+       xfs_dabuf_t             *bp;
+       xfs_dir2_block_tail_t   *btp;
+       int                     committed;
+       xfs_dir2_data_t         *d;
+       xfs_dir2_db_t           db;
+       xfs_dir2_data_entry_t   *dep;
+       xfs_dir2_data_unused_t  *dup;
+       char                    *endptr;
+       int                     error;
+       xfs_fsblock_t           firstblock;
+       xfs_bmap_free_t         flist;
+       char                    fname[MAXNAMELEN + 1];
+       freetab_t               *freetab;
+       int                     i;
+       int                     ino_offset;
+       ino_tree_node_t         *irec;
+       int                     junkit;
+       int                     lastfree;
+       int                     len;
+       int                     nbad;
+       int                     needlog;
+       int                     needscan;
+       xfs_ino_t               parent;
+       char                    *ptr;
+       xfs_trans_t             *tp;
+       int                     wantmagic;
+
+       bp = *bpp;
+       d = bp->data;
+       ptr = (char *)d->u;
+       nbad = 0;
+       needscan = needlog = 0;
+       freetab = *freetabp;
+       if (isblock) {
+               btp = XFS_DIR2_BLOCK_TAIL_P(mp, d);
+               blp = XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT);
+               endptr = (char *)blp;
+               if (endptr > (char *)btp)
+                       endptr = (char *)btp;
+               wantmagic = XFS_DIR2_BLOCK_MAGIC;
+       } else {
+               endptr = (char *)d + mp->m_dirblksize;
+               wantmagic = XFS_DIR2_DATA_MAGIC;
+       }
+       db = XFS_DIR2_DA_TO_DB(mp, da_bno);
+       if (freetab->naents <= db) {
+               struct freetab_ent e;
+
+               *freetabp = freetab = realloc(freetab, FREETAB_SIZE(db + 1));
+               if (!freetab) {
+                       do_error(
+               "realloc failed in longform_dir2_entry_check_data (%u bytes)\n",
+                               FREETAB_SIZE(db + 1));
+                       exit(1);
+               }
+               e.v = NULLDATAOFF;
+               e.s = 0;
+               for (i = freetab->naents; i < db; i++)
+                       freetab->ents[i] = e;
+               freetab->naents = db + 1;
+       }
+       if (freetab->nents < db + 1)
+               freetab->nents = db + 1;
+       while (ptr < endptr) {
+               dup = (xfs_dir2_data_unused_t *)ptr;
+               if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG) {
+                       if (ptr + INT_GET(dup->length, ARCH_CONVERT) > endptr || INT_GET(dup->length, ARCH_CONVERT) == 0 ||
+                           (INT_GET(dup->length, ARCH_CONVERT) & (XFS_DIR2_DATA_ALIGN - 1)))
+                               break;
+                       if (INT_GET(*XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(dup, ARCH_CONVERT), ARCH_CONVERT) != 
+                           (char *)dup - (char *)d)
+                               break;
+                       ptr += INT_GET(dup->length, ARCH_CONVERT);
+                       if (ptr >= endptr)
+                               break;
+               }
+               dep = (xfs_dir2_data_entry_t *)ptr;
+               if (ptr + XFS_DIR2_DATA_ENTSIZE(dep->namelen) > endptr)
+                       break;
+               if (INT_GET(*XFS_DIR2_DATA_ENTRY_TAG_P(dep), ARCH_CONVERT) != (char *)dep - (char *)d)
+                       break;
+               ptr += XFS_DIR2_DATA_ENTSIZE(dep->namelen);
+       }
+       if (ptr != endptr) {
+               do_warn("corrupt block %u in directory inode %llu: ",
+                       da_bno, ip->i_ino);
+               if (!no_modify) {
+                       do_warn("junking block\n");
+                       dir2_kill_block(mp, ip, da_bno, bp);
+               } else {
+                       do_warn("would junk block\n");
+                       libxfs_da_brelse(NULL, bp);
+               }
+               freetab->ents[db].v = NULLDATAOFF;
+               *bpp = NULL;
+               return;
+       }
+       tp = libxfs_trans_alloc(mp, 0);
+       error = libxfs_trans_reserve(tp, 0, XFS_REMOVE_LOG_RES(mp), 0,
+               XFS_TRANS_PERM_LOG_RES, XFS_REMOVE_LOG_COUNT);
+       if (error)
+               res_failed(error);
+       libxfs_trans_ijoin(tp, ip, 0);
+       libxfs_trans_ihold(tp, ip);
+       libxfs_da_bjoin(tp, bp);
+       if (isblock)
+               libxfs_da_bhold(tp, bp);
+       XFS_BMAP_INIT(&flist, &firstblock);
+       if (INT_GET(d->hdr.magic, ARCH_CONVERT) != wantmagic) {
+               do_warn("bad directory block magic # %#x for directory inode "
+                       "%llu block %d: ",
+                       INT_GET(d->hdr.magic, ARCH_CONVERT), ip->i_ino, da_bno);
+               if (!no_modify) {
+                       do_warn("fixing magic # to %#x\n", wantmagic);
+                       INT_SET(d->hdr.magic, ARCH_CONVERT, wantmagic);
+                       needlog = 1;
+               } else
+                       do_warn("would fix magic # to %#x\n", wantmagic);
+       }
+       lastfree = 0;
+       ptr = (char *)d->u;
+       /*
+        * look at each entry.  reference inode pointed to by each
+        * entry in the incore inode tree.
+        * if not a directory, set reached flag, increment link count
+        * if a directory and reached, mark entry as to be deleted.
+        * if a directory, check to see if recorded parent
+        *      matches current inode #,
+        *      if so, then set reached flag, increment link count
+        *              of current and child dir inodes, push the child
+        *              directory inode onto the directory stack.
+        *      if current inode != parent, then mark entry to be deleted.
+        */
+       while (ptr < endptr) {
+               dup = (xfs_dir2_data_unused_t *)ptr;
+               if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG) {
+                       if (lastfree) {
+                               do_warn("directory inode %llu block %u has "
+                                       "consecutive free entries: ",
+                                       ip->i_ino, da_bno);
+                               if (!no_modify) {
+                                       do_warn("joining together\n");
+                                       len = INT_GET(dup->length, ARCH_CONVERT);
+                                       libxfs_dir2_data_use_free(tp, bp, dup,
+                                               ptr - (char *)d, len, &needlog,
+                                               &needscan);
+                                       libxfs_dir2_data_make_free(tp, bp,
+                                               ptr - (char *)d, len, &needlog,
+                                               &needscan);
+                               } else
+                                       do_warn("would join together\n");
+                       }
+                       ptr += INT_GET(dup->length, ARCH_CONVERT);
+                       lastfree = 1;
+                       continue;
+               }
+               addr = XFS_DIR2_DB_OFF_TO_DATAPTR(mp, db, ptr - (char *)d);
+               dep = (xfs_dir2_data_entry_t *)ptr;
+               ptr += XFS_DIR2_DATA_ENTSIZE(dep->namelen);
+               lastfree = 0;
+               dir_hash_add(hashtab,
+                       libxfs_da_hashname((char *)dep->name, dep->namelen),
+                       addr, dep->name[0] == '/');
+               /*
+                * skip bogus entries (leading '/').  they'll be deleted
+                * later
+                */
+               if (dep->name[0] == '/')  {
+                       nbad++;
+                       continue;
+               }
+               junkit = 0;
+               bcopy(dep->name, fname, dep->namelen);
+               fname[dep->namelen] = '\0';
+               ASSERT(INT_GET(dep->inumber, ARCH_CONVERT) != NULLFSINO);
+               /*
+                * skip the '..' entry since it's checked when the
+                * directory is reached by something else.  if it never
+                * gets reached, it'll be moved to the orphanage and we'll
+                * take care of it then.
+                */
+               if (dep->namelen == 2 && dep->name[0] == '.' &&
+                   dep->name[1] == '.')
+                       continue;
+               ASSERT(no_modify || !verify_inum(mp, INT_GET(dep->inumber, ARCH_CONVERT)));
+               /*
+                * special case the . entry.  we know there's only one
+                * '.' and only '.' points to itself because bogus entries
+                * got trashed in phase 3 if there were > 1.
+                * bump up link count for '.' but don't set reached
+                * until we're actually reached by another directory
+                * '..' is already accounted for or will be taken care
+                * of when directory is moved to orphanage.
+                */
+               if (ip->i_ino == INT_GET(dep->inumber, ARCH_CONVERT))  {
+                       ASSERT(dep->name[0] == '.' && dep->namelen == 1);
+                       add_inode_ref(current_irec, current_ino_offset);
+                       *need_dot = 0;
+                       continue;
+               }
+               /*
+                * special case the "lost+found" entry if pointing
+                * to where we think lost+found should be.  if that's
+                * the case, that's the one we created in phase 6.
+                * just skip it.  no need to process it and it's ..
+                * link is already accounted for.
+                */
+               if (INT_GET(dep->inumber, ARCH_CONVERT) == orphanage_ino &&
+                   strcmp(fname, ORPHANAGE) == 0)
+                       continue;
+               /*
+                * skip entries with bogus inumbers if we're in no modify mode
+                */
+               if (no_modify && verify_inum(mp, INT_GET(dep->inumber, ARCH_CONVERT)))
+                       continue;
+               /*
+                * ok, now handle the rest of the cases besides '.' and '..'
+                */
+               irec = find_inode_rec(XFS_INO_TO_AGNO(mp, INT_GET(dep->inumber, ARCH_CONVERT)),
+                       XFS_INO_TO_AGINO(mp, INT_GET(dep->inumber, ARCH_CONVERT)));
+               if (irec == NULL)  {
+                       nbad++;
+                       do_warn("entry \"%s\" in directory inode %llu points "
+                               "to non-existent inode, ",
+                               fname, ip->i_ino);
+                       if (!no_modify)  {
+                               dep->name[0] = '/';
+                               libxfs_dir2_data_log_entry(tp, bp, dep);
+                               do_warn("marking entry to be junked\n");
+                       } else  {
+                               do_warn("would junk entry\n");
+                       }
+                       continue;
+               }
+               ino_offset =
+                       XFS_INO_TO_AGINO(mp, INT_GET(dep->inumber, ARCH_CONVERT)) - irec->ino_startnum;
+               /*
+                * if it's a free inode, blow out the entry.
+                * by now, any inode that we think is free
+                * really is free.
+                */
+               if (is_inode_free(irec, ino_offset))  {
+                       /*
+                        * don't complain if this entry points to the old
+                        * and now-free lost+found inode
+                        */
+                       if (verbose || no_modify ||
+                           INT_GET(dep->inumber, ARCH_CONVERT) != old_orphanage_ino)
+                               do_warn("entry \"%s\" in directory inode %llu "
+                                       "points to free inode %llu",
+                                       fname, ip->i_ino, INT_GET(dep->inumber, ARCH_CONVERT));
+                       nbad++;
+                       if (!no_modify)  {
+                               if (verbose ||
+                                   INT_GET(dep->inumber, ARCH_CONVERT) != old_orphanage_ino)
+                                       do_warn(", marking entry to be "
+                                               "junked\n");
+                               else
+                                       do_warn("\n");
+                               dep->name[0] = '/';
+                               libxfs_dir2_data_log_entry(tp, bp, dep);
+                       } else  {
+                               do_warn(", would junk entry\n");
+                       }
+                       continue;
+               }
+               /*
+                * check easy case first, regular inode, just bump
+                * the link count and continue
+                */
+               if (!inode_isadir(irec, ino_offset))  {
+                       add_inode_reached(irec, ino_offset);
+                       continue;
+               }
+               parent = get_inode_parent(irec, ino_offset);
+               ASSERT(parent != 0);
+               /*
+                * bump up the link counts in parent and child
+                * directory but if the link doesn't agree with
+                * the .. in the child, blow out the entry.
+                * if the directory has already been reached,
+                * blow away the entry also.
+                */
+               if (is_inode_reached(irec, ino_offset))  {
+                       junkit = 1;
+                       do_warn("entry \"%s\" in dir %llu points to an already "
+                               "connected directory inode %llu,\n", fname,
+                               ip->i_ino, INT_GET(dep->inumber, ARCH_CONVERT));
+               } else if (parent == ip->i_ino)  {
+                       add_inode_reached(irec, ino_offset);
+                       add_inode_ref(current_irec, current_ino_offset);
+                       if (!is_inode_refchecked(INT_GET(dep->inumber, ARCH_CONVERT), irec,
+                                       ino_offset))
+                               push_dir(stack, INT_GET(dep->inumber, ARCH_CONVERT));
+               } else  {
+                       junkit = 1;
+                       do_warn("entry \"%s\" in directory inode %llu not "
+                               "consistent with .. value (%llu) in ino "
+                               "%llu,\n",
+                               fname, ip->i_ino, parent, INT_GET(dep->inumber, ARCH_CONVERT));
+               }
+               if (junkit)  {
+                       junkit = 0;
+                       nbad++;
+                       if (!no_modify)  {
+                               dep->name[0] = '/';
+                               libxfs_dir2_data_log_entry(tp, bp, dep);
+                               if (verbose ||
+                                   INT_GET(dep->inumber, ARCH_CONVERT) != old_orphanage_ino)
+                                       do_warn("\twill clear entry \"%s\"\n",
+                                               fname);
+                       } else  {
+                               do_warn("\twould clear entry \"%s\"\n", fname);
+                       }
+               }
+       }
+       *num_illegal += nbad;
+       if (needscan)
+               libxfs_dir2_data_freescan(mp, d, &needlog, NULL);
+       if (needlog)
+               libxfs_dir2_data_log_header(tp, bp);
+       libxfs_bmap_finish(&tp, &flist, firstblock, &committed);
+       libxfs_trans_commit(tp, 0, 0);
+       freetab->ents[db].v = INT_GET(d->hdr.bestfree[0].length, ARCH_CONVERT);
+       freetab->ents[db].s = 0;
+}
+
+/*
+ * Check contents of leaf-form block.
+ */
+int
+longform_dir2_check_leaf(
+       xfs_mount_t             *mp,
+       xfs_inode_t             *ip,
+       dir_hash_tab_t          *hashtab,
+       freetab_t               *freetab)
+{
+       int                     badtail;
+       xfs_dir2_data_off_t     *bestsp;
+       xfs_dabuf_t             *bp;
+       xfs_dablk_t             da_bno;
+       int                     i;
+       xfs_dir2_leaf_t         *leaf;
+       xfs_dir2_leaf_tail_t    *ltp;
+       int                     seeval;
+
+       da_bno = mp->m_dirleafblk;
+       if (libxfs_da_read_bufr(NULL, ip, da_bno, -1, &bp, XFS_DATA_FORK)) {
+               do_error("can't read block %u for directory inode %llu\n",
+                       da_bno, ip->i_ino);
+               /* NOTREACHED */
+       }
+       leaf = bp->data;
+       ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
+       bestsp = XFS_DIR2_LEAF_BESTS_P_ARCH(ltp, ARCH_CONVERT);
+       if (INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) != XFS_DIR2_LEAF1_MAGIC ||
+           INT_GET(leaf->hdr.info.forw, ARCH_CONVERT) || INT_GET(leaf->hdr.info.back, ARCH_CONVERT) ||
+           INT_GET(leaf->hdr.count, ARCH_CONVERT) < INT_GET(leaf->hdr.stale, ARCH_CONVERT) ||
+           INT_GET(leaf->hdr.count, ARCH_CONVERT) > XFS_DIR2_MAX_LEAF_ENTS(mp) ||
+           (char *)&leaf->ents[INT_GET(leaf->hdr.count, ARCH_CONVERT)] > (char *)bestsp) {
+               do_warn("leaf block %u for directory inode %llu bad header\n",
+                       da_bno, ip->i_ino);
+               libxfs_da_brelse(NULL, bp);
+               return 1;
+       }
+       seeval = dir_hash_see_all(hashtab, leaf->ents, INT_GET(leaf->hdr.count, ARCH_CONVERT),
+               INT_GET(leaf->hdr.stale, ARCH_CONVERT));
+       if (dir_hash_check(hashtab, ip, seeval)) {
+               libxfs_da_brelse(NULL, bp);
+               return 1;
+       }
+       badtail = freetab->nents != INT_GET(ltp->bestcount, ARCH_CONVERT);
+       for (i = 0; !badtail && i < INT_GET(ltp->bestcount, ARCH_CONVERT); i++) {
+               freetab->ents[i].s = 1;
+               badtail = freetab->ents[i].v != INT_GET(bestsp[i], ARCH_CONVERT);
+       }
+       if (badtail) {
+               do_warn("leaf block %u for directory inode %llu bad tail\n",
+                       da_bno, ip->i_ino);
+               libxfs_da_brelse(NULL, bp);
+               return 1;
+       }
+       libxfs_da_brelse(NULL, bp);
+       return 0;
+}
+
+/*
+ * Check contents of the node blocks (leaves)
+ * Looks for matching hash values for the data entries.
+ */
+int
+longform_dir2_check_node(
+       xfs_mount_t             *mp,
+       xfs_inode_t             *ip,
+       dir_hash_tab_t          *hashtab,
+       freetab_t               *freetab)
+{
+       xfs_dabuf_t             *bp;
+       xfs_dablk_t             da_bno;
+       xfs_dir2_db_t           fdb;
+       xfs_dir2_free_t         *free;
+       int                     i;
+       xfs_dir2_leaf_t         *leaf;
+       xfs_fileoff_t           next_da_bno;
+       int                     seeval = 0;
+       int                     used;
+
+       for (da_bno = mp->m_dirleafblk, next_da_bno = 0;
+            next_da_bno != NULLFILEOFF && da_bno < mp->m_dirfreeblk;
+            da_bno = (xfs_dablk_t)next_da_bno) {
+               next_da_bno = da_bno + mp->m_dirblkfsbs - 1;
+               if (libxfs_bmap_next_offset(NULL, ip, &next_da_bno, XFS_DATA_FORK))
+                       break;
+               if (libxfs_da_read_bufr(NULL, ip, da_bno, -1, &bp,
+                               XFS_DATA_FORK)) {
+                       do_error("can't read block %u for directory inode "
+                                "%llu\n",
+                               da_bno, ip->i_ino);
+                       /* NOTREACHED */
+               }
+               leaf = bp->data;
+               if (INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) != XFS_DIR2_LEAFN_MAGIC) {
+                       if (INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC) {
+                               libxfs_da_brelse(NULL, bp);
+                               continue;
+                       }
+                       do_warn("unknown magic number %#x for block %u in "
+                               "directory inode %llu\n",
+                               INT_GET(leaf->hdr.info.magic, ARCH_CONVERT), da_bno, ip->i_ino);
+                       libxfs_da_brelse(NULL, bp);
+                       return 1;
+               }
+               if (INT_GET(leaf->hdr.count, ARCH_CONVERT) < INT_GET(leaf->hdr.stale, ARCH_CONVERT) ||
+                   INT_GET(leaf->hdr.count, ARCH_CONVERT) > XFS_DIR2_MAX_LEAF_ENTS(mp)) {
+                       do_warn("leaf block %u for directory inode %llu bad "
+                               "header\n",
+                               da_bno, ip->i_ino);
+                       libxfs_da_brelse(NULL, bp);
+                       return 1;
+               }
+               seeval = dir_hash_see_all(hashtab, leaf->ents, INT_GET(leaf->hdr.count, ARCH_CONVERT),
+                       INT_GET(leaf->hdr.stale, ARCH_CONVERT));
+               libxfs_da_brelse(NULL, bp);
+               if (seeval != DIR_HASH_CK_OK)
+                       return 1;
+       }
+       if (dir_hash_check(hashtab, ip, seeval))
+               return 1;
+       for (da_bno = mp->m_dirfreeblk, next_da_bno = 0;
+            next_da_bno != NULLFILEOFF;
+            da_bno = (xfs_dablk_t)next_da_bno) {
+               next_da_bno = da_bno + mp->m_dirblkfsbs - 1;
+               if (libxfs_bmap_next_offset(NULL, ip, &next_da_bno, XFS_DATA_FORK))
+                       break;
+               if (libxfs_da_read_bufr(NULL, ip, da_bno, -1, &bp,
+                               XFS_DATA_FORK)) {
+                       do_error("can't read block %u for directory inode "
+                                "%llu\n",
+                               da_bno, ip->i_ino);
+                       /* NOTREACHED */
+               }
+               free = bp->data;
+               fdb = XFS_DIR2_DA_TO_DB(mp, da_bno);
+               if (INT_GET(free->hdr.magic, ARCH_CONVERT) != XFS_DIR2_FREE_MAGIC ||
+                   INT_GET(free->hdr.firstdb, ARCH_CONVERT) !=
+                       (fdb - XFS_DIR2_FREE_FIRSTDB(mp)) *
+                       XFS_DIR2_MAX_FREE_BESTS(mp) ||
+                   INT_GET(free->hdr.nvalid, ARCH_CONVERT) < INT_GET(free->hdr.nused, ARCH_CONVERT)) {
+                       do_warn("free block %u for directory inode %llu bad "
+                               "header\n",
+                               da_bno, ip->i_ino);
+                       libxfs_da_brelse(NULL, bp);
+                       return 1;
+               }
+               for (i = used = 0; i < INT_GET(free->hdr.nvalid, ARCH_CONVERT); i++) {
+                       if (i + INT_GET(free->hdr.firstdb, ARCH_CONVERT) >= freetab->nents ||
+                           freetab->ents[i + INT_GET(free->hdr.firstdb, ARCH_CONVERT)].v !=
+                           INT_GET(free->bests[i], ARCH_CONVERT)) {
+                               do_warn("free block %u entry %i for directory "
+                                       "ino %llu bad\n",
+                                       da_bno, i, ip->i_ino);
+                               libxfs_da_brelse(NULL, bp);
+                               return 1;
+                       }
+                       used += INT_GET(free->bests[i], ARCH_CONVERT) != NULLDATAOFF;
+                       freetab->ents[i + INT_GET(free->hdr.firstdb, ARCH_CONVERT)].s = 1;
+               }
+               if (used != INT_GET(free->hdr.nused, ARCH_CONVERT)) {
+                       do_warn("free block %u for directory inode %llu bad "
+                               "nused\n",
+                               da_bno, ip->i_ino);
+                       libxfs_da_brelse(NULL, bp);
+                       return 1;
+               }
+               libxfs_da_brelse(NULL, bp);
+       }
+       for (i = 0; i < freetab->nents; i++) {
+               if (freetab->ents[i].s == 0) {
+                       do_warn("missing freetab entry %u for directory inode "
+                               "%llu\n",
+                               i, ip->i_ino);
+                       return 1;
+               }
+       }
+       return 0;
+}
+
+/*
+ * Rebuild a directory: set up.
+ * Turn it into a node-format directory with no contents in the
+ * upper area.  Also has correct freespace blocks.
+ */
+void
+longform_dir2_rebuild_setup(
+       xfs_mount_t             *mp,
+       xfs_ino_t               ino,
+       xfs_inode_t             *ip,
+       freetab_t               *freetab)
+{
+       xfs_da_args_t           args;
+       int                     committed;
+       xfs_dir2_data_t         *data;
+       xfs_dabuf_t             *dbp;
+       int                     error;
+       xfs_dir2_db_t           fbno;
+       xfs_dabuf_t             *fbp;
+       xfs_fsblock_t           firstblock;
+       xfs_bmap_free_t         flist;
+       xfs_dir2_free_t         *free;
+       int                     i;
+       int                     j;
+       xfs_dablk_t             lblkno;
+       xfs_dabuf_t             *lbp;
+       xfs_dir2_leaf_t         *leaf;
+       int                     nres;
+       xfs_trans_t             *tp;
+
+       tp = libxfs_trans_alloc(mp, 0);
+       nres = XFS_DAENTER_SPACE_RES(mp, XFS_DATA_FORK);
+       error = libxfs_trans_reserve(tp,
+               nres, XFS_CREATE_LOG_RES(mp), 0, XFS_TRANS_PERM_LOG_RES,
+               XFS_CREATE_LOG_COUNT);
+       if (error)
+               res_failed(error);
+       libxfs_trans_ijoin(tp, ip, 0);
+       libxfs_trans_ihold(tp, ip);
+       XFS_BMAP_INIT(&flist, &firstblock);
+       if (libxfs_da_read_buf(tp, ip, mp->m_dirdatablk, -2, &dbp,
+                       XFS_DATA_FORK)) {
+               do_error("can't read block %u for directory inode %llu\n",
+                       mp->m_dirdatablk, ino);
+               /* NOTREACHED */
+       }
+       if (dbp && (data = dbp->data)->hdr.magic == XFS_DIR2_BLOCK_MAGIC) {
+               xfs_dir2_block_t        *block;
+               xfs_dir2_leaf_entry_t   *blp;
+               xfs_dir2_block_tail_t   *btp;
+               int                     needlog;
+               int                     needscan;
+
+               INT_SET(data->hdr.magic, ARCH_CONVERT, XFS_DIR2_DATA_MAGIC);
+               block = (xfs_dir2_block_t *)data;
+               btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+               blp = XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT);
+               needlog = needscan = 0;
+               libxfs_dir2_data_make_free(tp, dbp, (char *)blp - (char *)block,
+                       (char *)block + mp->m_dirblksize - (char *)blp,
+                       &needlog, &needscan);
+               if (needscan)
+                       libxfs_dir2_data_freescan(mp, data, &needlog, NULL);
+               libxfs_da_log_buf(tp, dbp, 0, mp->m_dirblksize - 1);
+       }
+       bzero(&args, sizeof(args));
+       args.trans = tp;
+       args.dp = ip;
+       args.whichfork = XFS_DATA_FORK;
+       args.firstblock = &firstblock;
+       args.flist = &flist;
+       args.total = nres;
+       if ((error = libxfs_da_grow_inode(&args, &lblkno)) ||
+           (error = libxfs_da_get_buf(tp, ip, lblkno, -1, &lbp, XFS_DATA_FORK))) {
+               do_error("can't add btree block to directory inode %llu\n",
+                       ino);
+               /* NOTREACHED */
+       }
+       leaf = lbp->data;
+       bzero(leaf, mp->m_dirblksize);
+       INT_SET(leaf->hdr.info.magic, ARCH_CONVERT, XFS_DIR2_LEAFN_MAGIC);
+       libxfs_da_log_buf(tp, lbp, 0, mp->m_dirblksize - 1);
+       libxfs_bmap_finish(&tp, &flist, firstblock, &committed);
+       libxfs_trans_commit(tp, 0, 0);
+
+       for (i = 0; i < freetab->nents; i += XFS_DIR2_MAX_FREE_BESTS(mp)) {
+               tp = libxfs_trans_alloc(mp, 0);
+               nres = XFS_DAENTER_SPACE_RES(mp, XFS_DATA_FORK);
+               error = libxfs_trans_reserve(tp,
+                       nres, XFS_CREATE_LOG_RES(mp), 0, XFS_TRANS_PERM_LOG_RES,
+                       XFS_CREATE_LOG_COUNT);
+               if (error)
+                       res_failed(error);
+               libxfs_trans_ijoin(tp, ip, 0);
+               libxfs_trans_ihold(tp, ip);
+               XFS_BMAP_INIT(&flist, &firstblock);
+               bzero(&args, sizeof(args));
+               args.trans = tp;
+               args.dp = ip;
+               args.whichfork = XFS_DATA_FORK;
+               args.firstblock = &firstblock;
+               args.flist = &flist;
+               args.total = nres;
+               if ((error = libxfs_dir2_grow_inode(&args, XFS_DIR2_FREE_SPACE,
+                                                &fbno)) ||
+                   (error = libxfs_da_get_buf(tp, ip, XFS_DIR2_DB_TO_DA(mp, fbno),
+                                           -1, &fbp, XFS_DATA_FORK))) {
+                       do_error("can't add free block to directory inode "
+                                "%llu\n",
+                               ino);
+                       /* NOTREACHED */
+               }
+               free = fbp->data;
+               bzero(free, mp->m_dirblksize);
+               INT_SET(free->hdr.magic, ARCH_CONVERT, XFS_DIR2_FREE_MAGIC);
+               INT_SET(free->hdr.firstdb, ARCH_CONVERT, i);
+               INT_SET(free->hdr.nvalid, ARCH_CONVERT, XFS_DIR2_MAX_FREE_BESTS(mp));
+               if (i + INT_GET(free->hdr.nvalid, ARCH_CONVERT) > freetab->nents)
+                       INT_SET(free->hdr.nvalid, ARCH_CONVERT, freetab->nents - i);
+               for (j = 0; j < INT_GET(free->hdr.nvalid, ARCH_CONVERT); j++) {
+                       INT_SET(free->bests[j], ARCH_CONVERT, freetab->ents[i + j].v);
+                       if (INT_GET(free->bests[j], ARCH_CONVERT) != NULLDATAOFF)
+                               INT_MOD(free->hdr.nused, ARCH_CONVERT, +1);
+               }
+               libxfs_da_log_buf(tp, fbp, 0, mp->m_dirblksize - 1);
+               libxfs_bmap_finish(&tp, &flist, firstblock, &committed);
+               libxfs_trans_commit(tp, 0, 0);
+       }
+}
+
+/*
+ * Rebuild the entries from a single data block.
+ */
+void
+longform_dir2_rebuild_data(
+       xfs_mount_t             *mp,
+       xfs_ino_t               ino,
+       xfs_inode_t             *ip,
+       xfs_dablk_t             da_bno)
+{
+       xfs_dabuf_t             *bp;
+       xfs_dir2_block_tail_t   *btp;
+       int                     committed;
+       xfs_dir2_data_t         *data;
+       xfs_dir2_db_t           dbno;
+       xfs_dir2_data_entry_t   *dep;
+       xfs_dir2_data_unused_t  *dup;
+       char                    *endptr;
+       int                     error;
+       xfs_dir2_free_t         *fblock;
+       xfs_dabuf_t             *fbp;
+       xfs_dir2_db_t           fdb;
+       int                     fi;
+       xfs_fsblock_t           firstblock;
+       xfs_bmap_free_t         flist;
+       int                     needlog;
+       int                     needscan;
+       int                     nres;
+       char                    *ptr;
+       xfs_trans_t             *tp;
+
+       if (libxfs_da_read_buf(NULL, ip, da_bno, da_bno == 0 ? -2 : -1, &bp,
+                       XFS_DATA_FORK)) {
+               do_error("can't read block %u for directory inode %llu\n",
+                       da_bno, ino);
+               /* NOTREACHED */
+       }
+       if (da_bno == 0 && bp == NULL)
+               /*
+                * The block was punched out.
+                */
+               return;
+       ASSERT(bp);
+       dbno = XFS_DIR2_DA_TO_DB(mp, da_bno);
+       fdb = XFS_DIR2_DB_TO_FDB(mp, dbno);
+       if (libxfs_da_read_buf(NULL, ip, XFS_DIR2_DB_TO_DA(mp, fdb), -1, &fbp,
+                       XFS_DATA_FORK)) {
+               do_error("can't read block %u for directory inode %llu\n",
+                       XFS_DIR2_DB_TO_DA(mp, fdb), ino);
+               /* NOTREACHED */
+       }
+       data = malloc(mp->m_dirblksize);
+       if (!data) {
+               do_error(
+               "malloc failed in longform_dir2_rebuild_data (%u bytes)\n",
+                       mp->m_dirblksize);
+               exit(1);
+       }
+       bcopy(bp->data, data, mp->m_dirblksize);
+       ptr = (char *)data->u;
+       if (INT_GET(data->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC) {
+               btp = XFS_DIR2_BLOCK_TAIL_P(mp, (xfs_dir2_block_t *)data);
+               endptr = (char *)XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT);
+       } else
+               endptr = (char *)data + mp->m_dirblksize;
+       fblock = fbp->data;
+       fi = XFS_DIR2_DB_TO_FDINDEX(mp, dbno);
+       tp = libxfs_trans_alloc(mp, 0);
+       error = libxfs_trans_reserve(tp, 0, XFS_CREATE_LOG_RES(mp), 0,
+               XFS_TRANS_PERM_LOG_RES, XFS_CREATE_LOG_COUNT);
+       if (error)
+               res_failed(error);
+       libxfs_trans_ijoin(tp, ip, 0);
+       libxfs_trans_ihold(tp, ip);
+       libxfs_da_bjoin(tp, bp);
+       libxfs_da_bhold(tp, bp);
+       libxfs_da_bjoin(tp, fbp);
+       libxfs_da_bhold(tp, fbp);
+       XFS_BMAP_INIT(&flist, &firstblock);
+       needlog = needscan = 0;
+       bzero(((xfs_dir2_data_t *)(bp->data))->hdr.bestfree,
+               sizeof(data->hdr.bestfree));
+       libxfs_dir2_data_make_free(tp, bp, (xfs_dir2_data_aoff_t)sizeof(data->hdr),
+               mp->m_dirblksize - sizeof(data->hdr), &needlog, &needscan);
+       ASSERT(needscan == 0);
+       libxfs_dir2_data_log_header(tp, bp);
+       INT_SET(fblock->bests[fi], ARCH_CONVERT,
+               INT_GET(((xfs_dir2_data_t *)(bp->data))->hdr.bestfree[0].length, ARCH_CONVERT));
+       libxfs_dir2_free_log_bests(tp, fbp, fi, fi);
+       libxfs_bmap_finish(&tp, &flist, firstblock, &committed);
+       libxfs_trans_commit(tp, 0, 0);
+
+       while (ptr < endptr) {
+               dup = (xfs_dir2_data_unused_t *)ptr;
+               if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG) {
+                       ptr += INT_GET(dup->length, ARCH_CONVERT);
+                       continue;
+               }
+               dep = (xfs_dir2_data_entry_t *)ptr;
+               ptr += XFS_DIR2_DATA_ENTSIZE(dep->namelen);
+               if (dep->name[0] == '/')
+                       continue;
+               tp = libxfs_trans_alloc(mp, 0);
+               nres = XFS_CREATE_SPACE_RES(mp, dep->namelen);
+               error = libxfs_trans_reserve(tp, nres, XFS_CREATE_LOG_RES(mp), 0,
+                       XFS_TRANS_PERM_LOG_RES, XFS_CREATE_LOG_COUNT);
+               if (error)
+                       res_failed(error);
+               libxfs_trans_ijoin(tp, ip, 0);
+               libxfs_trans_ihold(tp, ip);
+               libxfs_da_bjoin(tp, bp);
+               libxfs_da_bhold(tp, bp);
+               libxfs_da_bjoin(tp, fbp);
+               libxfs_da_bhold(tp, fbp);
+               XFS_BMAP_INIT(&flist, &firstblock);
+               error = dir_createname(mp, tp, ip, (char *)dep->name,
+                       dep->namelen, INT_GET(dep->inumber, ARCH_CONVERT),
+                       &firstblock, &flist, nres);
+               ASSERT(error == 0);
+               libxfs_bmap_finish(&tp, &flist, firstblock, &committed);
+               libxfs_trans_commit(tp, 0, 0);
+       }
+       libxfs_da_brelse(NULL, bp);
+       libxfs_da_brelse(NULL, fbp);
+       free(data);
+}
+
+/*
+ * Finish the rebuild of a directory.
+ * Stuff / in and then remove it, this forces the directory to end 
+ * up in the right format.
+ */
+void
+longform_dir2_rebuild_finish(
+       xfs_mount_t             *mp,
+       xfs_ino_t               ino,
+       xfs_inode_t             *ip)
+{
+       int                     committed;
+       int                     error;
+       xfs_fsblock_t           firstblock;
+       xfs_bmap_free_t         flist;
+       int                     nres;
+       xfs_trans_t             *tp;
+
+       tp = libxfs_trans_alloc(mp, 0);
+       nres = XFS_CREATE_SPACE_RES(mp, 1);
+       error = libxfs_trans_reserve(tp, nres, XFS_CREATE_LOG_RES(mp), 0,
+               XFS_TRANS_PERM_LOG_RES, XFS_CREATE_LOG_COUNT);
+       if (error)
+               res_failed(error);
+       libxfs_trans_ijoin(tp, ip, 0);
+       libxfs_trans_ihold(tp, ip);
+       XFS_BMAP_INIT(&flist, &firstblock);
+       error = dir_createname(mp, tp, ip, "/", 1, ino,
+                       &firstblock, &flist, nres);
+       ASSERT(error == 0);
+       libxfs_bmap_finish(&tp, &flist, firstblock, &committed);
+       libxfs_trans_commit(tp, 0, 0);
+
+       /* could kill trailing empty data blocks here */
+
+       tp = libxfs_trans_alloc(mp, 0);
+       nres = XFS_REMOVE_SPACE_RES(mp);
+       error = libxfs_trans_reserve(tp, nres, XFS_REMOVE_LOG_RES(mp), 0,
+               XFS_TRANS_PERM_LOG_RES, XFS_REMOVE_LOG_COUNT);
+       if (error)
+               res_failed(error);
+       libxfs_trans_ijoin(tp, ip, 0);
+       libxfs_trans_ihold(tp, ip);
+       XFS_BMAP_INIT(&flist, &firstblock);
+       error = dir_removename(mp, tp, ip, "/", 1, ino,
+                       &firstblock, &flist, nres);
+       ASSERT(error == 0);
+       libxfs_bmap_finish(&tp, &flist, firstblock, &committed);
+       libxfs_trans_commit(tp, 0, 0);
+}
+
+/*
+ * Rebuild a directory.
+ * Remove all the non-data blocks.
+ * Re-initialize to (empty) node form.
+ * Loop over the data blocks reinserting each entry.
+ * Force the directory into the right format.
+ */
+void
+longform_dir2_rebuild(
+       xfs_mount_t     *mp,
+       xfs_ino_t       ino,
+       xfs_inode_t     *ip,
+       int             *num_illegal,
+       freetab_t       *freetab,
+       int             isblock)
+{
+       xfs_dabuf_t     *bp;
+       xfs_dablk_t     da_bno;
+       xfs_fileoff_t   next_da_bno;
+
+       do_warn("rebuilding directory inode %llu\n", ino);
+       for (da_bno = mp->m_dirleafblk, next_da_bno = isblock ? NULLFILEOFF : 0;
+            next_da_bno != NULLFILEOFF;
+            da_bno = (xfs_dablk_t)next_da_bno) {
+               next_da_bno = da_bno + mp->m_dirblkfsbs - 1;
+               if (libxfs_bmap_next_offset(NULL, ip, &next_da_bno, XFS_DATA_FORK))
+                       break;
+               if (libxfs_da_get_buf(NULL, ip, da_bno, -1, &bp, XFS_DATA_FORK)) {
+                       do_error("can't get block %u for directory inode "
+                                "%llu\n",
+                               da_bno, ino);
+                       /* NOTREACHED */
+               }
+               dir2_kill_block(mp, ip, da_bno, bp);
+       }
+       longform_dir2_rebuild_setup(mp, ino, ip, freetab);
+       for (da_bno = mp->m_dirdatablk, next_da_bno = 0;
+            da_bno < mp->m_dirleafblk && next_da_bno != NULLFILEOFF;
+            da_bno = (xfs_dablk_t)next_da_bno) {
+               next_da_bno = da_bno + mp->m_dirblkfsbs - 1;
+               if (libxfs_bmap_next_offset(NULL, ip, &next_da_bno, XFS_DATA_FORK))
+                       break;
+               longform_dir2_rebuild_data(mp, ino, ip, da_bno);
+       }
+       longform_dir2_rebuild_finish(mp, ino, ip);
+       *num_illegal = 0;
+}
+
+/*
+ * succeeds or dies, inode never gets dirtied since all changes
+ * happen in file blocks.  the inode size and other core info
+ * is already correct, it's just the leaf entries that get altered.
+ * XXX above comment is wrong for v2 - need to see why it matters
+ */
+void
+longform_dir2_entry_check(xfs_mount_t  *mp,
+                       xfs_ino_t       ino,
+                       xfs_inode_t     *ip,
+                       int             *num_illegal,
+                       int             *need_dot,
+                       dir_stack_t     *stack,
+                       ino_tree_node_t *irec,
+                       int             ino_offset)
+{
+       xfs_dir2_block_t        *block;
+       xfs_dir2_leaf_entry_t   *blp;
+       xfs_dabuf_t             *bp;
+       xfs_dir2_block_tail_t   *btp;
+       xfs_dablk_t             da_bno;
+       freetab_t               *freetab;
+       dir_hash_tab_t          *hashtab;
+       int                     i;
+       int                     isblock;
+       int                     isleaf;
+       xfs_fileoff_t           next_da_bno;
+       int                     seeval;
+       int                     fixit;
+
+       *need_dot = 1;
+       freetab = malloc(FREETAB_SIZE(ip->i_d.di_size / mp->m_dirblksize));
+       if (!freetab) {
+               do_error(
+               "malloc failed in longform_dir2_entry_check (%u bytes)\n",
+                       FREETAB_SIZE(ip->i_d.di_size / mp->m_dirblksize));
+               exit(1);
+       }
+       freetab->naents = ip->i_d.di_size / mp->m_dirblksize;
+       freetab->nents = 0;
+       for (i = 0; i < freetab->naents; i++) {
+               freetab->ents[i].v = NULLDATAOFF;
+               freetab->ents[i].s = 0;
+       }
+       libxfs_dir2_isblock(NULL, ip, &isblock);
+       libxfs_dir2_isleaf(NULL, ip, &isleaf);
+       hashtab = dir_hash_init(ip->i_d.di_size);
+       for (da_bno = 0, next_da_bno = 0;
+            next_da_bno != NULLFILEOFF && da_bno < mp->m_dirleafblk;
+            da_bno = (xfs_dablk_t)next_da_bno) {
+               next_da_bno = da_bno + mp->m_dirblkfsbs - 1;
+               if (libxfs_bmap_next_offset(NULL, ip, &next_da_bno, XFS_DATA_FORK))
+                       break;
+               if (libxfs_da_read_bufr(NULL, ip, da_bno, -1, &bp,
+                               XFS_DATA_FORK)) {
+                       do_error("can't read block %u for directory inode "
+                                "%llu\n",
+                               da_bno, ino);
+                       /* NOTREACHED */
+               }
+               longform_dir2_entry_check_data(mp, ip, num_illegal, need_dot,
+                       stack, irec, ino_offset, &bp, hashtab, &freetab, da_bno,
+                       isblock);
+               /* it releases the buffer unless isblock is set */
+       }
+       fixit = (*num_illegal != 0) || dir2_is_badino(ino);
+       if (isblock) {
+               ASSERT(bp);
+               block = bp->data;
+               btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
+               blp = XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT);
+               seeval = dir_hash_see_all(hashtab, blp, INT_GET(btp->count, ARCH_CONVERT), INT_GET(btp->stale, ARCH_CONVERT));
+               if (dir_hash_check(hashtab, ip, seeval))
+                       fixit |= 1;
+               libxfs_da_brelse(NULL, bp);
+       } else if (isleaf) {
+               fixit |= longform_dir2_check_leaf(mp, ip, hashtab, freetab);
+       } else {
+               fixit |= longform_dir2_check_node(mp, ip, hashtab, freetab);
+       }
+       dir_hash_done(hashtab);
+       if (!no_modify && fixit)
+               longform_dir2_rebuild(mp, ino, ip, num_illegal, freetab,
+                       isblock);
+       free(freetab);
+}
+
+/*
+ * shortform directory processing routines -- entry verification and
+ * bad entry deletion (pruning).
+ */
+void
+shortform_dir_entry_check(xfs_mount_t  *mp,
+                       xfs_ino_t       ino,
+                       xfs_inode_t     *ip,
+                       int             *ino_dirty,
+                       dir_stack_t     *stack,
+                       ino_tree_node_t *current_irec,
+                       int             current_ino_offset)
+{
+       xfs_ino_t               lino;
+       xfs_ino_t               parent;
+       xfs_dir_shortform_t     *sf;
+       xfs_dir_sf_entry_t      *sf_entry, *next_sfe, *tmp_sfe;
+       xfs_ifork_t             *ifp;
+       ino_tree_node_t         *irec;
+       int                     max_size;
+       int                     ino_offset;
+       int                     i;
+       int                     junkit;
+       int                     tmp_len;
+       int                     tmp_elen;
+       int                     bad_sfnamelen;
+       int                     namelen;
+       int                     bytes_deleted;
+       char                    fname[MAXNAMELEN + 1];
+
+       ifp = &ip->i_df;
+       sf = (xfs_dir_shortform_t *) ifp->if_u1.if_data;
+       *ino_dirty = 0;
+       bytes_deleted = 0;
+
+       max_size = ifp->if_bytes;
+       ASSERT(ip->i_d.di_size <= ifp->if_bytes);
+
+       /*
+        * no '.' entry in shortform dirs, just bump up ref count by 1
+        * '..' was already (or will be) accounted for and checked when
+        * the directory is reached or will be taken care of when the
+        * directory is moved to orphanage.
+        */
+       add_inode_ref(current_irec, current_ino_offset);
+
+       /*
+        * now run through entries, stop at first bad entry, don't need
+        * to skip over '..' since that's encoded in its own field and
+        * no need to worry about '.' since it doesn't exist.
+        */
+       sf_entry = next_sfe = &sf->list[0];
+       if (sf == NULL) { 
+               junkit = 1;
+               do_warn("shortform dir inode %llu has null data entries \n", ino);
+
+               }
+       else {
+          for (i = 0; i < INT_GET(sf->hdr.count, ARCH_CONVERT) && max_size >
+                                       (__psint_t)next_sfe - (__psint_t)sf;
+                       sf_entry = next_sfe, i++)  {
+               junkit = 0;
+               bad_sfnamelen = 0;
+               tmp_sfe = NULL;
+
+               XFS_DIR_SF_GET_DIRINO_ARCH(&sf_entry->inumber, &lino, ARCH_CONVERT);
+
+               namelen = sf_entry->namelen;
+
+               ASSERT(no_modify || namelen > 0);
+
+               if (no_modify && namelen == 0)  {
+                       /*
+                        * if we're really lucky, this is
+                        * the last entry in which case we
+                        * can use the dir size to set the
+                        * namelen value.  otherwise, forget
+                        * it because we're not going to be
+                        * able to find the next entry.
+                        */
+                       bad_sfnamelen = 1;
+
+                       if (i == INT_GET(sf->hdr.count, ARCH_CONVERT) - 1)  {
+                               namelen = ip->i_d.di_size -
+                                       ((__psint_t) &sf_entry->name[0] -
+                                        (__psint_t) sf);
+                       } else  {
+                               /*
+                                * don't process the rest of the directory,
+                                * break out of processing looop
+                                */
+                               break;
+                       }
+               } else if (no_modify && (__psint_t) sf_entry - (__psint_t) sf +
+                               + XFS_DIR_SF_ENTSIZE_BYENTRY(sf_entry)
+                               > ip->i_d.di_size)  {
+                       bad_sfnamelen = 1;
+
+                       if (i == INT_GET(sf->hdr.count, ARCH_CONVERT) - 1)  {
+                               namelen = ip->i_d.di_size -
+                                       ((__psint_t) &sf_entry->name[0] -
+                                        (__psint_t) sf);
+                       } else  {
+                               /*
+                                * don't process the rest of the directory,
+                                * break out of processing looop
+                                */
+                               break;
+                       }
+               }
+
+               bcopy(sf_entry->name, fname, sf_entry->namelen);
+               fname[sf_entry->namelen] = '\0';
+
+               ASSERT(no_modify || lino != NULLFSINO);
+               ASSERT(no_modify || !verify_inum(mp, lino));
+
+               /*
+                * special case the "lost+found" entry if it's pointing
+                * to where we think lost+found should be.  if that's
+                * the case, that's the one we created in phase 6.
+                * just skip it.  no need to process it and its ..
+                * link is already accounted for.  Also skip entries
+                * with bogus inode numbers if we're in no modify mode.
+                */
+
+               if (lino == orphanage_ino && strcmp(fname, ORPHANAGE) == 0
+                               || no_modify && verify_inum(mp, lino))  {
+                       next_sfe = (xfs_dir_sf_entry_t *)
+                               ((__psint_t) sf_entry +
+                               XFS_DIR_SF_ENTSIZE_BYENTRY(sf_entry));
+                       continue;
+               }
+
+               irec = find_inode_rec(XFS_INO_TO_AGNO(mp, lino),
+                                       XFS_INO_TO_AGINO(mp, lino));
+
+               if (irec == NULL && no_modify)  {
+                       do_warn(
+"entry \"%s\" in shortform dir %llu references non-existent ino %llu\n",
+                               fname, ino, lino);
+                       do_warn("would junk entry\n");
+                       continue;
+               }
+
+               ASSERT(irec != NULL);
+
+               ino_offset = XFS_INO_TO_AGINO(mp, lino) - irec->ino_startnum;
+
+               /*
+                * if it's a free inode, blow out the entry.
+                * by now, any inode that we think is free
+                * really is free.
+                */
+               if (is_inode_free(irec, ino_offset))  {
+                       /*
+                        * don't complain if this entry points to the old
+                        * and now-free lost+found inode
+                        */
+                       if (verbose || no_modify || lino != old_orphanage_ino)
+                               do_warn(
+       "entry \"%s\" in shortform dir inode %llu points to free inode %llu\n",
+                                       fname, ino, lino);
+
+                       if (!no_modify)  {
+                               junkit = 1;
+                       } else  {
+                               do_warn("would junk entry \"%s\"\n",
+                                       fname);
+                       }
+               } else if (!inode_isadir(irec, ino_offset))  {
+                       /*
+                        * check easy case first, regular inode, just bump
+                        * the link count and continue
+                        */
+                       add_inode_reached(irec, ino_offset);
+
+                       next_sfe = (xfs_dir_sf_entry_t *)
+                               ((__psint_t) sf_entry +
+                               XFS_DIR_SF_ENTSIZE_BYENTRY(sf_entry));
+                       continue;
+               } else  {
+                       parent = get_inode_parent(irec, ino_offset);
+
+                       /*
+                        * bump up the link counts in parent and child.
+                        * directory but if the link doesn't agree with
+                        * the .. in the child, blow out the entry
+                        */
+                       if (is_inode_reached(irec, ino_offset))  {
+                               junkit = 1;
+                               do_warn(
+       "entry \"%s\" in dir %llu references already connected dir ino %llu,\n",
+                                       fname, ino, lino);
+                       } else if (parent == ino)  {
+                               add_inode_reached(irec, ino_offset);
+                               add_inode_ref(current_irec, current_ino_offset);
+
+                               if (!is_inode_refchecked(lino, irec,
+                                               ino_offset))
+                                       push_dir(stack, lino);
+                       } else  {
+                               junkit = 1;
+                               do_warn(
+"entry \"%s\" in dir %llu not consistent with .. value (%llu) in dir ino %llu,\n",
+                                       fname, ino, parent, lino);
+                       }
+               }
+
+               if (junkit)  {
+                       if (!no_modify)  {
+                               tmp_elen = XFS_DIR_SF_ENTSIZE_BYENTRY(sf_entry);
+                               tmp_sfe = (xfs_dir_sf_entry_t *)
+                                       ((__psint_t) sf_entry + tmp_elen);
+                               tmp_len = max_size - ((__psint_t) tmp_sfe
+                                                       - (__psint_t) sf);
+                               max_size -= tmp_elen;
+                               bytes_deleted += tmp_elen;
+
+                               memmove(sf_entry, tmp_sfe, tmp_len);
+
+                               INT_MOD(sf->hdr.count, ARCH_CONVERT, -1);
+                               bzero((void *) ((__psint_t) sf_entry + tmp_len),
+                                               tmp_elen);
+
+                               /*
+                                * set the tmp value to the current
+                                * pointer so we'll process the entry
+                                * we just moved up
+                                */
+                               tmp_sfe = sf_entry;
+
+                               /*
+                                * WARNING:  drop the index i by one
+                                * so it matches the decremented count for
+                                * accurate comparisons in the loop test
+                                */
+                               i--;
+
+                               *ino_dirty = 1;
+
+                               if (verbose || lino != old_orphanage_ino)
+                                       do_warn(
+                       "junking entry \"%s\" in directory inode %llu\n",
+                                               fname, lino);
+                       } else  {
+                               do_warn("would junk entry \"%s\"\n", fname);
+                       }
+               }
+
+               /*
+                * go onto next entry unless we've just junked an
+                * entry in which the current entry pointer points
+                * to an unprocessed entry.  have to take into entries
+                * with bad namelen into account in no modify mode since we
+                * calculate size based on next_sfe.
+                */
+               ASSERT(no_modify || bad_sfnamelen == 0);
+
+               next_sfe = (tmp_sfe == NULL)
+                       ? (xfs_dir_sf_entry_t *) ((__psint_t) sf_entry
+                               + ((!bad_sfnamelen)
+                                       ? XFS_DIR_SF_ENTSIZE_BYENTRY(sf_entry)
+                                       : sizeof(xfs_dir_sf_entry_t) - 1
+                                               + namelen))
+                       : tmp_sfe;
+           }
+       }
+
+       /*
+        * sync up sizes if required
+        */
+       if (*ino_dirty)  {
+               ASSERT(bytes_deleted > 0);
+               ASSERT(!no_modify);
+               libxfs_idata_realloc(ip, -bytes_deleted, XFS_DATA_FORK);
+               ip->i_d.di_size -= bytes_deleted;
+       }
+
+       if (ip->i_d.di_size != ip->i_df.if_bytes)  {
+               ASSERT(ip->i_df.if_bytes == (xfs_fsize_t)
+                               ((__psint_t) next_sfe - (__psint_t) sf));
+               ip->i_d.di_size = (xfs_fsize_t)
+                               ((__psint_t) next_sfe - (__psint_t) sf);
+               do_warn(
+               "setting size to %lld bytes to reflect junked entries\n",
+                               ip->i_d.di_size);
+               *ino_dirty = 1;
+       }
+}
+
+/* ARGSUSED */
+void
+prune_sf_dir_entry(xfs_mount_t *mp, xfs_ino_t ino, xfs_inode_t *ip)
+{
+                               /* REFERENCED */
+       xfs_ino_t               lino;
+       xfs_dir_shortform_t     *sf;
+       xfs_dir_sf_entry_t      *sf_entry, *next_sfe, *tmp_sfe;
+       xfs_ifork_t             *ifp;
+       int                     max_size;
+       int                     i;
+       int                     tmp_len;
+       int                     tmp_elen;
+       int                     bytes_deleted;
+       char                    fname[MAXNAMELEN + 1];
+
+       ifp = &ip->i_df;
+       sf = (xfs_dir_shortform_t *) ifp->if_u1.if_data;
+       bytes_deleted = 0;
+
+       max_size = ifp->if_bytes;
+       ASSERT(ip->i_d.di_size <= ifp->if_bytes);
+
+       /*
+        * now run through entries and delete every bad entry
+        */
+       sf_entry = next_sfe = &sf->list[0];
+
+       for (i = 0; i < INT_GET(sf->hdr.count, ARCH_CONVERT) && max_size >
+                                       (__psint_t)next_sfe - (__psint_t)sf;
+                       sf_entry = next_sfe, i++)  {
+               tmp_sfe = NULL;
+
+               XFS_DIR_SF_GET_DIRINO_ARCH(&sf_entry->inumber, &lino, ARCH_CONVERT);
+
+               bcopy(sf_entry->name, fname, sf_entry->namelen);
+               fname[sf_entry->namelen] = '\0';
+
+               if (sf_entry->name[0] == '/')  {
+                       if (!no_modify)  {
+                               tmp_elen = XFS_DIR_SF_ENTSIZE_BYENTRY(sf_entry);
+                               tmp_sfe = (xfs_dir_sf_entry_t *)
+                                       ((__psint_t) sf_entry + tmp_elen);
+                               tmp_len = max_size - ((__psint_t) tmp_sfe
+                                                       - (__psint_t) sf);
+                               max_size -= tmp_elen;
+                               bytes_deleted += tmp_elen;
+
+                               memmove(sf_entry, tmp_sfe, tmp_len);
+
+                               INT_MOD(sf->hdr.count, ARCH_CONVERT, -1);
+                               bzero((void *) ((__psint_t) sf_entry + tmp_len),
+                                               tmp_elen);
+
+                               /*
+                                * set the tmp value to the current
+                                * pointer so we'll process the entry
+                                * we just moved up
+                                */
+                               tmp_sfe = sf_entry;
+
+                               /*
+                                * WARNING:  drop the index i by one
+                                * so it matches the decremented count for
+                                * accurate comparisons in the loop test
+                                */
+                               i--;
+                       }
+               }
+               next_sfe = (tmp_sfe == NULL)
+                       ? (xfs_dir_sf_entry_t *) ((__psint_t) sf_entry +
+                               XFS_DIR_SF_ENTSIZE_BYENTRY(sf_entry))
+                       : tmp_sfe;
+       }
+
+       /*
+        * sync up sizes if required
+        */
+       if (bytes_deleted > 0)  {
+               libxfs_idata_realloc(ip, -bytes_deleted, XFS_DATA_FORK);
+               ip->i_d.di_size -= bytes_deleted;
+       }
+
+       if (ip->i_d.di_size != ip->i_df.if_bytes)  {
+               ASSERT(ip->i_df.if_bytes == (xfs_fsize_t)
+                               ((__psint_t) next_sfe - (__psint_t) sf));
+               ip->i_d.di_size = (xfs_fsize_t)
+                               ((__psint_t) next_sfe - (__psint_t) sf);
+               do_warn(
+               "setting size to %lld bytes to reflect junked entries\n",
+                               ip->i_d.di_size);
+       }
+}
+
+/*
+ * shortform directory v2 processing routines -- entry verification and
+ * bad entry deletion (pruning).
+ */
+void
+shortform_dir2_entry_check(xfs_mount_t *mp,
+                       xfs_ino_t       ino,
+                       xfs_inode_t     *ip,
+                       int             *ino_dirty,
+                       dir_stack_t     *stack,
+                       ino_tree_node_t *current_irec,
+                       int             current_ino_offset)
+{
+       xfs_ino_t               lino;
+       xfs_ino_t               parent;
+       xfs_dir2_sf_t           *sfp;
+       xfs_dir2_sf_entry_t     *sfep, *next_sfep, *tmp_sfep;
+       xfs_ifork_t             *ifp;
+       ino_tree_node_t         *irec;
+       int                     max_size;
+       int                     ino_offset;
+       int                     i;
+       int                     junkit;
+       int                     tmp_len;
+       int                     tmp_elen;
+       int                     bad_sfnamelen;
+       int                     namelen;
+       int                     bytes_deleted;
+       char                    fname[MAXNAMELEN + 1];
+       int                     i8;
+
+       ifp = &ip->i_df;
+       sfp = (xfs_dir2_sf_t *) ifp->if_u1.if_data;
+       *ino_dirty = 0;
+       bytes_deleted = i8 = 0;
+
+       max_size = ifp->if_bytes;
+       ASSERT(ip->i_d.di_size <= ifp->if_bytes);
+
+       /*
+        * no '.' entry in shortform dirs, just bump up ref count by 1
+        * '..' was already (or will be) accounted for and checked when
+        * the directory is reached or will be taken care of when the
+        * directory is moved to orphanage.
+        */
+       add_inode_ref(current_irec, current_ino_offset);
+
+       /*
+        * now run through entries, stop at first bad entry, don't need
+        * to skip over '..' since that's encoded in its own field and
+        * no need to worry about '.' since it doesn't exist.
+        */
+       sfep = next_sfep = XFS_DIR2_SF_FIRSTENTRY(sfp);
+
+       for (i = 0; i < INT_GET(sfp->hdr.count, ARCH_CONVERT) && max_size >
+                                       (__psint_t)next_sfep - (__psint_t)sfp;
+                       sfep = next_sfep, i++)  {
+               junkit = 0;
+               bad_sfnamelen = 0;
+               tmp_sfep = NULL;
+
+               lino = XFS_DIR2_SF_GET_INUMBER_ARCH(sfp, XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT);
+
+               namelen = sfep->namelen;
+
+               ASSERT(no_modify || namelen > 0);
+
+               if (no_modify && namelen == 0)  {
+                       /*
+                        * if we're really lucky, this is
+                        * the last entry in which case we
+                        * can use the dir size to set the
+                        * namelen value.  otherwise, forget
+                        * it because we're not going to be
+                        * able to find the next entry.
+                        */
+                       bad_sfnamelen = 1;
+
+                       if (i == INT_GET(sfp->hdr.count, ARCH_CONVERT) - 1)  {
+                               namelen = ip->i_d.di_size -
+                                       ((__psint_t) &sfep->name[0] -
+                                        (__psint_t) sfp);
+                       } else  {
+                               /*
+                                * don't process the rest of the directory,
+                                * break out of processing loop
+                                */
+                               break;
+                       }
+               } else if (no_modify && (__psint_t) sfep - (__psint_t) sfp +
+                               + XFS_DIR2_SF_ENTSIZE_BYENTRY(sfp, sfep)
+                               > ip->i_d.di_size)  {
+                       bad_sfnamelen = 1;
+
+                       if (i == INT_GET(sfp->hdr.count, ARCH_CONVERT) - 1)  {
+                               namelen = ip->i_d.di_size -
+                                       ((__psint_t) &sfep->name[0] -
+                                        (__psint_t) sfp);
+                       } else  {
+                               /*
+                                * don't process the rest of the directory,
+                                * break out of processing loop
+                                */
+                               break;
+                       }
+               }
+
+               bcopy(sfep->name, fname, sfep->namelen);
+               fname[sfep->namelen] = '\0';
+
+               ASSERT(no_modify || (lino != NULLFSINO && lino != 0));
+               ASSERT(no_modify || !verify_inum(mp, lino));
+
+               /*
+                * special case the "lost+found" entry if it's pointing
+                * to where we think lost+found should be.  if that's
+                * the case, that's the one we created in phase 6.
+                * just skip it.  no need to process it and its ..
+                * link is already accounted for.  Also skip entries
+                * with bogus inode numbers if we're in no modify mode.
+                */
+
+               if (lino == orphanage_ino && strcmp(fname, ORPHANAGE) == 0
+                               || no_modify && verify_inum(mp, lino))  {
+                       next_sfep = (xfs_dir2_sf_entry_t *)
+                               ((__psint_t) sfep +
+                               XFS_DIR2_SF_ENTSIZE_BYENTRY(sfp, sfep));
+                       continue;
+               }
+
+               irec = find_inode_rec(XFS_INO_TO_AGNO(mp, lino),
+                                       XFS_INO_TO_AGINO(mp, lino));
+
+               if (irec == NULL && no_modify)  {
+                       do_warn("entry \"%s\" in shortform directory %llu "
+                               "references non-existent inode %llu\n",
+                               fname, ino, lino);
+                       do_warn("would junk entry\n");
+                       continue;
+               }
+
+               ASSERT(irec != NULL);
+
+               ino_offset = XFS_INO_TO_AGINO(mp, lino) - irec->ino_startnum;
+
+               /*
+                * if it's a free inode, blow out the entry.
+                * by now, any inode that we think is free
+                * really is free.
+                */
+               if (is_inode_free(irec, ino_offset))  {
+                       /*
+                        * don't complain if this entry points to the old
+                        * and now-free lost+found inode
+                        */
+                       if (verbose || no_modify || lino != old_orphanage_ino)
+                               do_warn("entry \"%s\" in shortform directory "
+                                       "inode %llu points to free inode "
+                                       "%llu\n",
+                                       fname, ino, lino);
+
+                       if (!no_modify)  {
+                               junkit = 1;
+                       } else  {
+                               do_warn("would junk entry \"%s\"\n",
+                                       fname);
+                       }
+               } else if (!inode_isadir(irec, ino_offset))  {
+                       /*
+                        * check easy case first, regular inode, just bump
+                        * the link count and continue
+                        */
+                       add_inode_reached(irec, ino_offset);
+
+                       next_sfep = (xfs_dir2_sf_entry_t *)
+                               ((__psint_t) sfep +
+                               XFS_DIR2_SF_ENTSIZE_BYENTRY(sfp, sfep));
+                       continue;
+               } else  {
+                       parent = get_inode_parent(irec, ino_offset);
+
+                       /*
+                        * bump up the link counts in parent and child.
+                        * directory but if the link doesn't agree with
+                        * the .. in the child, blow out the entry
+                        */
+                       if (is_inode_reached(irec, ino_offset))  {
+                               junkit = 1;
+                               do_warn("entry \"%s\" in directory inode %llu "
+                                       "references already connected inode "
+                                       "%llu,\n",
+                                       fname, ino, lino);
+                       } else if (parent == ino)  {
+                               add_inode_reached(irec, ino_offset);
+                               add_inode_ref(current_irec, current_ino_offset);
+
+                               if (!is_inode_refchecked(lino, irec,
+                                               ino_offset))
+                                       push_dir(stack, lino);
+                       } else  {
+                               junkit = 1;
+                               do_warn("entry \"%s\" in directory inode %llu "
+                                       "not consistent with .. value (%llu) "
+                                       "in inode %llu,\n",
+                                       fname, ino, parent, lino);
+                       }
+               }
+
+               if (junkit)  {
+                       if (!no_modify)  {
+                               tmp_elen = XFS_DIR2_SF_ENTSIZE_BYENTRY(sfp, sfep);
+                               tmp_sfep = (xfs_dir2_sf_entry_t *)
+                                       ((__psint_t) sfep + tmp_elen);
+                               tmp_len = max_size - ((__psint_t) tmp_sfep
+                                                       - (__psint_t) sfp);
+                               max_size -= tmp_elen;
+                               bytes_deleted += tmp_elen;
+
+                               memmove(sfep, tmp_sfep, tmp_len);
+
+                               INT_MOD(sfp->hdr.count, ARCH_CONVERT, -1);
+                               bzero((void *) ((__psint_t) sfep + tmp_len),
+                                               tmp_elen);
+
+                               /*
+                                * set the tmp value to the current
+                                * pointer so we'll process the entry
+                                * we just moved up
+                                */
+                               tmp_sfep = sfep;
+
+                               /*
+                                * WARNING:  drop the index i by one
+                                * so it matches the decremented count for
+                                * accurate comparisons in the loop test
+                                */
+                               i--;
+
+                               *ino_dirty = 1;
+
+                               if (verbose || lino != old_orphanage_ino)
+                                       do_warn("junking entry \"%s\" in "
+                                               "directory inode %llu\n",
+                                               fname, lino);
+                       } else  {
+                               do_warn("would junk entry \"%s\"\n", fname);
+                       }
+               } else if (lino > XFS_DIR2_MAX_SHORT_INUM)
+                       i8++;
+
+               /*
+                * go onto next entry unless we've just junked an
+                * entry in which the current entry pointer points
+                * to an unprocessed entry.  have to take into entries
+                * with bad namelen into account in no modify mode since we
+                * calculate size based on next_sfep.
+                */
+               ASSERT(no_modify || bad_sfnamelen == 0);
+
+               next_sfep = (tmp_sfep == NULL)
+                       ? (xfs_dir2_sf_entry_t *) ((__psint_t) sfep
+                               + ((!bad_sfnamelen)
+                                       ? XFS_DIR2_SF_ENTSIZE_BYENTRY(sfp, sfep)
+                                       : XFS_DIR2_SF_ENTSIZE_BYNAME(sfp, namelen)))
+                       : tmp_sfep;
+       }
+
+       if (sfp->hdr.i8count != i8) {
+               if (no_modify) {
+                       do_warn("would fix i8count in inode %llu\n", ino);
+               } else {
+                       if (i8 == 0) {
+                               tmp_sfep = next_sfep;
+                               process_sf_dir2_fixi8(sfp, &tmp_sfep);
+                               bytes_deleted +=
+                                       (__psint_t)next_sfep -
+                                       (__psint_t)tmp_sfep;
+                               next_sfep = tmp_sfep;
+                       } else
+                               sfp->hdr.i8count = i8;
+                       *ino_dirty = 1;
+                       do_warn("fixing i8count in inode %llu\n", ino);
+               }
+       }
+
+       /*
+        * sync up sizes if required
+        */
+       if (*ino_dirty)  {
+               ASSERT(bytes_deleted > 0);
+               ASSERT(!no_modify);
+               libxfs_idata_realloc(ip, -bytes_deleted, XFS_DATA_FORK);
+               ip->i_d.di_size -= bytes_deleted;
+       }
+
+       if (ip->i_d.di_size != ip->i_df.if_bytes)  {
+               ASSERT(ip->i_df.if_bytes == (xfs_fsize_t)
+                               ((__psint_t) next_sfep - (__psint_t) sfp));
+               ip->i_d.di_size = (xfs_fsize_t)
+                               ((__psint_t) next_sfep - (__psint_t) sfp);
+               do_warn("setting size to %lld bytes to reflect junked "
+                       "entries\n",
+                       ip->i_d.di_size);
+               *ino_dirty = 1;
+       }
+}
+
+/*
+ * processes all directories reachable via the inodes on the stack
+ * returns 0 if things are good, 1 if there's a problem
+ */
+void
+process_dirstack(xfs_mount_t *mp, dir_stack_t *stack)
+{
+       xfs_bmap_free_t         flist;
+       xfs_fsblock_t           first;
+       xfs_ino_t               ino;
+       xfs_inode_t             *ip;
+       xfs_trans_t             *tp;
+       xfs_dahash_t            hashval;
+       ino_tree_node_t         *irec;
+       int                     ino_offset, need_dot, committed;
+       int                     dirty, num_illegal, error, nres;
+
+       /*
+        * pull directory inode # off directory stack
+        *
+        * open up directory inode, check all entries,
+        * then call prune_dir_entries to remove all
+        * remaining illegal directory entries.
+        */
+
+       while ((ino = pop_dir(stack)) != NULLFSINO)  {
+               irec = find_inode_rec(XFS_INO_TO_AGNO(mp, ino),
+                                       XFS_INO_TO_AGINO(mp, ino));
+               ASSERT(irec != NULL);
+
+               ino_offset = XFS_INO_TO_AGINO(mp, ino) - irec->ino_startnum;
+
+               ASSERT(!is_inode_refchecked(ino, irec, ino_offset));
+
+               if (error = libxfs_iget(mp, NULL, ino, 0, &ip, 0))  {
+                       if (!no_modify)
+                               do_error("couldn't map inode %llu, err = %d\n",
+                                       ino, error);
+                       else  {
+                               do_warn("couldn't map inode %llu, err = %d\n",
+                                       ino, error);
+                               /*
+                                * see below for what we're doing if this
+                                * is root.  Why do we need to do this here?
+                                * to ensure that the root doesn't show up
+                                * as being disconnected in the no_modify case.
+                                */
+                               if (mp->m_sb.sb_rootino == ino)  {
+                                       add_inode_reached(irec, 0);
+                                       add_inode_ref(irec, 0);
+                               }
+                       }
+
+                       add_inode_refchecked(ino, irec, 0);
+                       continue;
+               }
+
+               need_dot = dirty = num_illegal = 0;
+
+               if (mp->m_sb.sb_rootino == ino)  {
+                       /*
+                        * mark root inode reached and bump up
+                        * link count for root inode to account
+                        * for '..' entry since the root inode is
+                        * never reached by a parent.  we know
+                        * that root's '..' is always good --
+                        * guaranteed by phase 3 and/or below.
+                        */
+                       add_inode_reached(irec, ino_offset);
+                       /*
+                        * account for link for the orphanage
+                        * "lost+found".  if we're running in
+                        * modify mode and it already existed,
+                        * we deleted it so it's '..' reference
+                        * never got counted.  so add it here if
+                        * we're going to create lost+found.
+                        *
+                        * if we're running in no_modify mode,
+                        * we never deleted lost+found and we're
+                        * not going to create it so do nothing.
+                        *
+                        * either way, the counts will match when
+                        * we look at the root inode's nlinks
+                        * field and compare that to our incore
+                        * count in phase 7.
+                        */
+                       if (!no_modify)
+                               add_inode_ref(irec, ino_offset);
+               }
+
+               add_inode_refchecked(ino, irec, ino_offset);
+
+               /*
+                * look for bogus entries
+                */
+               switch (ip->i_d.di_format)  {
+               case XFS_DINODE_FMT_EXTENTS:
+               case XFS_DINODE_FMT_BTREE:
+                       /*
+                        * also check for missing '.' in longform dirs.
+                        * missing .. entries are added if required when
+                        * the directory is connected to lost+found. but
+                        * we need to create '.' entries here.
+                        */
+                       if (XFS_SB_VERSION_HASDIRV2(&mp->m_sb))
+                               longform_dir2_entry_check(mp, ino, ip,
+                                                       &num_illegal, &need_dot,
+                                                       stack, irec,
+                                                       ino_offset);
+                       else
+                               longform_dir_entry_check(mp, ino, ip,
+                                                       &num_illegal, &need_dot,
+                                                       stack, irec,
+                                                       ino_offset);
+                       break;
+               case XFS_DINODE_FMT_LOCAL:
+                       tp = libxfs_trans_alloc(mp, 0);
+                       /*
+                        * using the remove reservation is overkill
+                        * since at most we'll only need to log the
+                        * inode but it's easier than wedging a
+                        * new define in ourselves.
+                        */
+                       nres = no_modify ? 0 : XFS_REMOVE_SPACE_RES(mp);
+                       error = libxfs_trans_reserve(tp, nres,
+                                       XFS_REMOVE_LOG_RES(mp), 0,
+                                       XFS_TRANS_PERM_LOG_RES,
+                                       XFS_REMOVE_LOG_COUNT);
+                       if (error)
+                               res_failed(error);
+
+                       libxfs_trans_ijoin(tp, ip, 0);
+                       libxfs_trans_ihold(tp, ip);
+
+                       if (XFS_SB_VERSION_HASDIRV2(&mp->m_sb))
+                               shortform_dir2_entry_check(mp, ino, ip, &dirty,
+                                                       stack, irec,
+                                                       ino_offset);
+                       else
+                               shortform_dir_entry_check(mp, ino, ip, &dirty,
+                                                       stack, irec,
+                                                       ino_offset);
+
+                       ASSERT(dirty == 0 || dirty && !no_modify);
+                       if (dirty)  {
+                               libxfs_trans_log_inode(tp, ip,
+                                       XFS_ILOG_CORE | XFS_ILOG_DDATA);
+                               libxfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES
+                                               |XFS_TRANS_SYNC, 0);
+                       } else  {
+                               libxfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES);
+                       }
+                       break;
+               default:
+                       break;
+               }
+
+               hashval = 0;
+
+               if (!no_modify && !orphanage_entered &&
+                   ino == mp->m_sb.sb_rootino) {
+                       do_warn("re-entering %s into root directory\n",
+                               ORPHANAGE);
+                       tp = libxfs_trans_alloc(mp, 0);
+                       nres = XFS_MKDIR_SPACE_RES(mp, strlen(ORPHANAGE));
+                       error = libxfs_trans_reserve(tp, nres,
+                                       XFS_MKDIR_LOG_RES(mp), 0,
+                                       XFS_TRANS_PERM_LOG_RES,
+                                       XFS_MKDIR_LOG_COUNT);
+                       if (error)
+                               res_failed(error);
+                       libxfs_trans_ijoin(tp, ip, 0);
+                       libxfs_trans_ihold(tp, ip);
+                       XFS_BMAP_INIT(&flist, &first);
+                       if (error = dir_createname(mp, tp, ip, ORPHANAGE,
+                                               strlen(ORPHANAGE),
+                                               orphanage_ino, &first, &flist,
+                                               nres))
+                               do_error("can't make %s entry in root inode "
+                                        "%llu, createname error %d\n",
+                                       ORPHANAGE, ino, error);
+                       libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+                       error = libxfs_bmap_finish(&tp, &flist, first, &committed);
+                       ASSERT(error == 0);
+                       libxfs_trans_commit(tp,
+                               XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_SYNC, 0);
+                       orphanage_entered = 1;
+               }
+
+               /*
+                * if we have to create a .. for /, do it now *before*
+                * we delete the bogus entries, otherwise the directory
+                * could transform into a shortform dir which would
+                * probably cause the simulation to choke.  Even
+                * if the illegal entries get shifted around, it's ok
+                * because the entries are structurally intact and in
+                * in hash-value order so the simulation won't get confused
+                * if it has to move them around.
+                */
+               if (!no_modify && need_root_dotdot &&
+                               ino == mp->m_sb.sb_rootino)  {
+                       ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_LOCAL);
+
+                       do_warn("recreating root directory .. entry\n");
+
+                       tp = libxfs_trans_alloc(mp, 0);
+                       ASSERT(tp != NULL);
+
+                       nres = XFS_MKDIR_SPACE_RES(mp, 2);
+                       error = libxfs_trans_reserve(tp, nres,
+                                       XFS_MKDIR_LOG_RES(mp),
+                                       0,
+                                       XFS_TRANS_PERM_LOG_RES,
+                                       XFS_MKDIR_LOG_COUNT);
+
+                       if (error)
+                               res_failed(error);
+
+                       libxfs_trans_ijoin(tp, ip, 0);
+                       libxfs_trans_ihold(tp, ip);
+
+                       XFS_BMAP_INIT(&flist, &first);
+
+                       if (error = dir_createname(mp, tp, ip, "..", 2,
+                                       ip->i_ino, &first, &flist, nres))
+                               do_error(
+"can't make \"..\" entry in root inode %llu, createname error %d\n",
+                                       ino, error);
+
+                       libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
+                       error = libxfs_bmap_finish(&tp, &flist, first,
+                                       &committed);
+                       ASSERT(error == 0);
+                       libxfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES
+                                       |XFS_TRANS_SYNC, 0);
+
+                       need_root_dotdot = 0;
+               } else if (need_root_dotdot && ino == mp->m_sb.sb_rootino)  {
+                       do_warn("would recreate root directory .. entry\n");
+               }
+
+               /*
+                * delete any illegal entries -- which should only exist
+                * if the directory is a longform directory.  bogus
+                * shortform directory entries were deleted in phase 4.
+                */
+               if (!no_modify && num_illegal > 0)  {
+                       ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_LOCAL);
+                       ASSERT(!XFS_SB_VERSION_HASDIRV2(&mp->m_sb));
+
+                       while (num_illegal > 0 && ip->i_d.di_format !=
+                                       XFS_DINODE_FMT_LOCAL)  {
+                               prune_lf_dir_entry(mp, ino, ip, &hashval);
+                               num_illegal--;
+                       }
+
+                       /*
+                        * handle case where we've deleted so many
+                        * entries that the directory has changed from
+                        * a longform to a shortform directory.  have
+                        * to allocate a transaction since we're working
+                        * with the incore data fork.
+                        */
+                       if (num_illegal > 0)  {
+                               ASSERT(ip->i_d.di_format ==
+                                       XFS_DINODE_FMT_LOCAL);
+                               tp = libxfs_trans_alloc(mp, 0);
+                               /*
+                                * using the remove reservation is overkill
+                                * since at most we'll only need to log the
+                                * inode but it's easier than wedging a
+                                * new define in ourselves.  10 block fs
+                                * space reservation is also overkill but
+                                * what the heck...
+                                */
+                               nres = XFS_REMOVE_SPACE_RES(mp);
+                               error = libxfs_trans_reserve(tp, nres,
+                                               XFS_REMOVE_LOG_RES(mp), 0,
+                                               XFS_TRANS_PERM_LOG_RES,
+                                               XFS_REMOVE_LOG_COUNT);
+                               if (error)
+                                       res_failed(error);
+
+                               libxfs_trans_ijoin(tp, ip, 0);
+                               libxfs_trans_ihold(tp, ip);
+
+                               prune_sf_dir_entry(mp, ino, ip);
+
+                               libxfs_trans_log_inode(tp, ip,
+                                               XFS_ILOG_CORE | XFS_ILOG_DDATA);
+                               ASSERT(error == 0);
+                               libxfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES
+                                               |XFS_TRANS_SYNC, 0);
+                       }
+               }
+
+               /*
+                * if we need to create the '.' entry, do so only if
+                * the directory is a longform dir.  it it's been
+                * turned into a shortform dir, then the inode is ok
+                * since shortform dirs have no '.' entry and the inode
+                * has already been committed by prune_lf_dir_entry().
+                */
+               if (need_dot)  {
+                       /*
+                        * bump up our link count but don't
+                        * bump up the inode link count.  chances
+                        * are good that even though we lost '.'
+                        * the inode link counts reflect '.' so
+                        * leave the inode link count alone and if
+                        * it turns out to be wrong, we'll catch
+                        * that in phase 7.
+                        */
+                       add_inode_ref(irec, ino_offset);
+
+                       if (no_modify)  {
+                               do_warn(
+       "would create missing \".\" entry in dir ino %llu\n",
+                                       ino);
+                       } else if (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)  {
+                               /*
+                                * need to create . entry in longform dir.
+                                */
+                               do_warn(
+       "creating missing \".\" entry in dir ino %llu\n",
+                                       ino);
+
+                               tp = libxfs_trans_alloc(mp, 0);
+                               ASSERT(tp != NULL);
+
+                               nres = XFS_MKDIR_SPACE_RES(mp, 1);
+                               error = libxfs_trans_reserve(tp, nres,
+                                               XFS_MKDIR_LOG_RES(mp),
+                                               0,
+                                               XFS_TRANS_PERM_LOG_RES,
+                                               XFS_MKDIR_LOG_COUNT);
+
+                               if (error)
+                                       res_failed(error);
+
+                               libxfs_trans_ijoin(tp, ip, 0);
+                               libxfs_trans_ihold(tp, ip);
+
+                               XFS_BMAP_INIT(&flist, &first);
+
+                               if (error = dir_createname(mp, tp, ip, ".",
+                                               1, ip->i_ino, &first, &flist,
+                                               nres))
+                                       do_error(
+       "can't make \".\" entry in dir ino %llu, createname error %d\n",
+                                               ino, error);
+
+                               libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
+                               error = libxfs_bmap_finish(&tp, &flist, first,
+                                               &committed);
+                               ASSERT(error == 0);
+                               libxfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES
+                                               |XFS_TRANS_SYNC, 0);
+                       }
+               }
+
+               libxfs_iput(ip, 0);
+       }
+}
+
+/*
+ * mark realtime bitmap and summary inodes as reached.
+ * quota inode will be marked here as well
+ */
+void
+mark_standalone_inodes(xfs_mount_t *mp)
+{
+       ino_tree_node_t         *irec;
+       int                     offset;
+
+       irec = find_inode_rec(XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rbmino),
+                       XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rbmino));
+
+       ASSERT(irec != NULL);
+
+       offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rbmino) -
+                       irec->ino_startnum;
+
+       add_inode_reached(irec, offset);
+
+       irec = find_inode_rec(XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rsumino),
+                       XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rsumino));
+
+       offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rsumino) - 
+                       irec->ino_startnum;
+
+       ASSERT(irec != NULL);
+
+       add_inode_reached(irec, offset);
+
+       if (fs_quotas)  {
+               if (mp->m_sb.sb_uquotino
+                               && mp->m_sb.sb_uquotino != NULLFSINO)  {
+                       irec = find_inode_rec(XFS_INO_TO_AGNO(mp,
+                                               mp->m_sb.sb_uquotino),
+                               XFS_INO_TO_AGINO(mp, mp->m_sb.sb_uquotino));
+                       offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_uquotino)
+                                       - irec->ino_startnum;
+                       add_inode_reached(irec, offset);
+               }
+               if (mp->m_sb.sb_pquotino
+                               && mp->m_sb.sb_pquotino != NULLFSINO)  {
+                       irec = find_inode_rec(XFS_INO_TO_AGNO(mp,
+                                               mp->m_sb.sb_pquotino),
+                               XFS_INO_TO_AGINO(mp, mp->m_sb.sb_pquotino));
+                       offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_pquotino)
+                                       - irec->ino_startnum;
+                       add_inode_reached(irec, offset);
+               }
+       }
+}
+
+void
+phase6(xfs_mount_t *mp)
+{
+       xfs_ino_t               ino;
+       ino_tree_node_t         *irec;
+       dir_stack_t             stack;
+       int                     i;
+       int                     j;
+
+       bzero(&zerocr, sizeof(cred_t));
+
+       do_log("Phase 6 - check inode connectivity...\n");
+
+       if (!no_modify)
+               teardown_bmap_finish(mp);
+       else
+               teardown_bmap(mp);
+
+       incore_ext_teardown(mp);
+
+       add_ino_backptrs(mp);
+
+       /*
+        * verify existence of root directory - if we have to
+        * make one, it's ok for the incore data structs not to
+        * know about it since everything about it (and the other
+        * inodes in its chunk if a new chunk was created) are ok
+        */
+       if (need_root_inode)  {
+               if (!no_modify)  {
+                       do_warn("reinitializing root directory\n");
+                       mk_root_dir(mp);
+                       need_root_inode = 0;
+                       need_root_dotdot = 0;
+               } else  {
+                       do_warn("would reinitialize root directory\n");
+               }
+       }
+
+       if (need_rbmino)  {
+               if (!no_modify)  {
+                       do_warn("reinitializing realtime bitmap inode\n");
+                       mk_rbmino(mp);
+                       need_rbmino = 0;
+               } else  {
+                       do_warn("would reinitialize realtime bitmap inode\n");
+               }
+       }
+
+       if (need_rsumino)  {
+               if (!no_modify)  {
+                       do_warn("reinitializing realtime summary inode\n");
+                       mk_rsumino(mp);
+                       need_rsumino = 0;
+               } else  {
+                       do_warn("would reinitialize realtime summary inode\n");
+               }
+       }
+
+       if (!no_modify)  {
+               do_log(
+       "        - resetting contents of realtime bitmap and summary inodes\n");
+               if (fill_rbmino(mp))  {
+                       do_warn(
+                       "Warning:  realtime bitmap may be inconsistent\n");
+               }
+
+               if (fill_rsumino(mp))  {
+                       do_warn(
+                       "Warning:  realtime bitmap may be inconsistent\n");
+               }
+       }
+
+       /*
+        * make orphanage (it's guaranteed to not exist now)
+        */
+       if (!no_modify)  {
+               do_log("        - ensuring existence of %s directory\n",
+                       ORPHANAGE);
+               orphanage_ino = mk_orphanage(mp);
+       }
+
+       dir_stack_init(&stack);
+
+       mark_standalone_inodes(mp);
+
+       /*
+        * push root dir on stack, then go
+        */
+       if (!need_root_inode)  {
+               do_log("        - traversing filesystem starting at / ... \n");
+
+               push_dir(&stack, mp->m_sb.sb_rootino);
+               process_dirstack(mp, &stack);
+
+               do_log("        - traversal finished ... \n");
+       } else  {
+               ASSERT(no_modify != 0);
+
+               do_log(
+"        - root inode lost, cannot make new one in no modify mode ... \n");
+               do_log(
+"        - skipping filesystem traversal from / ... \n");
+       }
+
+       do_log("        - traversing all unattached subtrees ... \n");
+
+       irec = find_inode_rec(XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino),
+                               XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino));
+
+       /*
+        * we always have a root inode, even if it's free...
+        * if the root is free, forget it, lost+found is already gone
+        */
+       if (is_inode_free(irec, 0) || !inode_isadir(irec, 0))  {
+               need_root_inode = 1;
+       }
+
+       /*
+        * then process all unreached inodes
+        * by walking incore inode tree
+        *
+        *      get next unreached directory inode # from
+        *              incore list
+        *      push inode on dir stack
+        *      call process_dirstack
+        */
+       for (i = 0; i < glob_agcount; i++)  {
+               irec = findfirst_inode_rec(i);
+
+               if (irec == NULL)
+                       continue;
+
+               while (irec != NULL)  {
+                       for (j = 0; j < XFS_INODES_PER_CHUNK; j++)  {
+                               if (!is_inode_confirmed(irec, j))
+                                       continue;
+                               /*
+                                * skip directories that have already been
+                                * processed, even if they haven't been
+                                * reached.  If they are reachable, we'll
+                                * pick them up when we process their parent.
+                                */
+                               ino = XFS_AGINO_TO_INO(mp, i,
+                                               j + irec->ino_startnum);
+                               if (inode_isadir(irec, j) &&
+                                               !is_inode_refchecked(ino,
+                                                       irec, j)) {
+                                       push_dir(&stack, ino);
+                                       process_dirstack(mp, &stack);
+                               }
+                       }
+                       irec = next_ino_rec(irec);
+               }
+       }
+
+       do_log("        - traversals finished ... \n");
+       do_log("        - moving disconnected inodes to lost+found ... \n");
+
+       /*
+        * move all disconnected inodes to the orphanage
+        */
+       for (i = 0; i < glob_agcount; i++)  {
+               irec = findfirst_inode_rec(i);
+
+               if (irec == NULL)
+                       continue;
+
+               while (irec != NULL)  {
+                       for (j = 0; j < XFS_INODES_PER_CHUNK; j++)  {
+                               ASSERT(is_inode_confirmed(irec, j));
+                               if (is_inode_free(irec, j))
+                                       continue;
+                               if (!is_inode_reached(irec, j)) {
+                                       ASSERT(inode_isadir(irec, j) ||
+                                               num_inode_references(irec, j)
+                                               == 0);
+                                       ino = XFS_AGINO_TO_INO(mp, i,
+                                               j + irec->ino_startnum);
+                                       if (inode_isadir(irec, j))
+                                               do_warn(
+                                               "disconnected dir inode %llu, ",
+                                                       ino);
+                                       else
+                                               do_warn(
+                                               "disconnected inode %llu, ",
+                                                       ino);
+                                       if (!no_modify)  {
+                                               do_warn("moving to %s\n",
+                                                       ORPHANAGE);
+                                               mv_orphanage(mp, orphanage_ino,
+                                                       ino,
+                                                       inode_isadir(irec, j));
+                                       } else  {
+                                               do_warn("would move to %s\n",
+                                                       ORPHANAGE);
+                                       }
+                                       /*
+                                        * for read-only case, even though
+                                        * the inode isn't really reachable,
+                                        * set the flag (and bump our link
+                                        * count) anyway to fool phase 7
+                                        */
+                                       add_inode_reached(irec, j);
+                               }
+                       }
+                       irec = next_ino_rec(irec);
+               }
+       }
+}
diff --git a/repair/phase7.c b/repair/phase7.c
new file mode 100644 (file)
index 0000000..670afee
--- /dev/null
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "avl.h"
+#include "globals.h"
+#include "agheader.h"
+#include "incore.h"
+#include "protos.h"
+#include "err_protos.h"
+#include "dinode.h"
+#include "versions.h"
+
+/* dinoc is a pointer to the IN-CORE dinode core */
+void
+set_nlinks(xfs_dinode_core_t   *dinoc,
+               xfs_ino_t       ino,
+               __uint32_t      nrefs,
+               int             *dirty)
+{
+       if (!no_modify)  {
+               if (INT_GET(dinoc->di_nlink, ARCH_NOCONVERT) != nrefs)  {
+                       *dirty = 1;
+                       do_warn("resetting inode %llu nlinks from %d to %d\n",
+                                       ino, INT_GET(dinoc->di_nlink, ARCH_NOCONVERT), nrefs);
+
+                       if (nrefs > XFS_MAXLINK_1)  {
+                               ASSERT(fs_inode_nlink);
+                               do_warn(
+"nlinks %d will overflow v1 ino, ino %llu will be converted to version 2\n",
+                                       nrefs, ino);
+
+                       }
+                       INT_SET(dinoc->di_nlink, ARCH_NOCONVERT, nrefs);
+               }
+       } else  {
+               if (INT_GET(dinoc->di_nlink, ARCH_NOCONVERT) != nrefs)
+                       do_warn(
+                       "would have reset inode %llu nlinks from %d to %d\n",
+                               ino, INT_GET(dinoc->di_nlink, ARCH_NOCONVERT), nrefs);
+       }
+}
+
+void
+phase7(xfs_mount_t *mp)
+{
+       ino_tree_node_t         *irec;
+       xfs_inode_t             *ip;
+       xfs_trans_t             *tp;
+       int                     i;
+       int                     j;
+       int                     error;
+       int                     dirty;
+       xfs_ino_t               ino;
+       __uint32_t              nrefs;
+
+       if (!no_modify)
+               printf("Phase 7 - verify and correct link counts...\n");
+       else
+               printf("Phase 7 - verify link counts...\n");
+
+       tp = libxfs_trans_alloc(mp, XFS_TRANS_REMOVE);
+
+       error = libxfs_trans_reserve(tp, (no_modify ? 0 : 10),
+               XFS_REMOVE_LOG_RES(mp), 0, XFS_TRANS_PERM_LOG_RES,
+               XFS_REMOVE_LOG_COUNT);
+
+       ASSERT(error == 0);
+
+       /*
+        * for each ag, look at each inode 1 at a time using the
+        * sim code.  if the number of links is bad, reset it,
+        * log the inode core, commit the transaction, and
+        * allocate a new transaction
+        */
+       for (i = 0; i < glob_agcount; i++)  {
+               irec = findfirst_inode_rec(i);
+
+               while (irec != NULL)  {
+                       for (j = 0; j < XFS_INODES_PER_CHUNK; j++)  {
+                               ASSERT(is_inode_confirmed(irec, j));
+
+                               if (is_inode_free(irec, j))
+                                       continue;
+
+                               ASSERT(no_modify || is_inode_reached(irec, j));
+                               ASSERT(no_modify ||
+                                               is_inode_referenced(irec, j));
+
+                               nrefs = num_inode_references(irec, j);
+
+                               ino = XFS_AGINO_TO_INO(mp, i,
+                                       irec->ino_startnum + j);
+
+                               error = libxfs_trans_iget(mp, tp, ino, 0, &ip);
+
+                               if (error)  {
+                                       if (!no_modify)
+                                               do_error(
+                                       "couldn't map inode %llu, err = %d\n",
+                                                       ino, error);
+                                       else  {
+                                               do_warn(
+       "couldn't map inode %llu, err = %d, can't compare link counts\n",
+                                                       ino, error);
+                                               continue;
+                                       }
+                               }
+
+                               dirty = 0;
+
+                               /*
+                                * compare and set links for all inodes
+                                * but the lost+found inode.  we keep
+                                * that correct as we go.
+                                */
+                               if (ino != orphanage_ino)
+                                       set_nlinks(&ip->i_d, ino, nrefs,
+                                                       &dirty);
+                               
+                               if (!dirty)  {
+                                       libxfs_trans_iput(tp, ip, 0);
+                               } else  {
+                                       libxfs_trans_log_inode(tp, ip,
+                                                       XFS_ILOG_CORE);
+                                       /*
+                                        * no need to do a bmap finish since
+                                        * we're not allocating anything
+                                        */
+                                       ASSERT(error == 0);
+                                       error = libxfs_trans_commit(tp,
+                                               XFS_TRANS_RELEASE_LOG_RES|
+                                               XFS_TRANS_SYNC, NULL);
+
+                                       ASSERT(error == 0);
+
+                                       tp = libxfs_trans_alloc(mp,
+                                                       XFS_TRANS_REMOVE);
+
+                                       error = libxfs_trans_reserve(tp,
+                                               (no_modify ? 0 : 10),
+                                               XFS_REMOVE_LOG_RES(mp),
+                                               0, XFS_TRANS_PERM_LOG_RES,
+                                               XFS_REMOVE_LOG_COUNT);
+                                       ASSERT(error == 0);
+                               }
+                       }
+                       irec = next_ino_rec(irec);
+               }
+       }
+
+       /* 
+        * always have one unfinished transaction coming out
+        * of the loop.  cancel it.
+        */
+       libxfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES);
+}
diff --git a/repair/protos.h b/repair/protos.h
new file mode 100644 (file)
index 0000000..4f3f8d5
--- /dev/null
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+void   xfs_init(libxfs_init_t *args);
+void   io_init(void);
+
+int    verify_sb(xfs_sb_t              *sb,
+               int                     is_primary_sb);
+int    verify_set_primary_sb(xfs_sb_t  *root_sb,
+                       int             sb_index,
+                       int             *sb_modified);
+int    get_sb(xfs_sb_t                 *sbp,
+               xfs_off_t                       off,
+               int                     size,
+               xfs_agnumber_t          agno);
+void   write_primary_sb(xfs_sb_t       *sbp,
+                       int             size);
+
+int    find_secondary_sb(xfs_sb_t      *sb);
+
+int    check_growfs(xfs_off_t off, int bufnum, xfs_agnumber_t agnum);
+
+void   get_sb_geometry(fs_geometry_t   *geo,
+                       xfs_sb_t        *sbp);
+
+char   *alloc_ag_buf(int size);
+
+void   print_inode_list(xfs_agnumber_t i);
+char * err_string(int err_code);
+
diff --git a/repair/rt.c b/repair/rt.c
new file mode 100644 (file)
index 0000000..ac4e8c3
--- /dev/null
@@ -0,0 +1,297 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "avl.h"
+#include "globals.h"
+#include "agheader.h"
+#include "incore.h"
+#include "dinode.h"
+#include "protos.h"
+#include "err_protos.h"
+#include "rt.h"
+
+#define xfs_highbit64 libxfs_highbit64 /* for XFS_RTBLOCKLOG macro */
+
+void
+rtinit(xfs_mount_t *mp)
+{
+       if (mp->m_sb.sb_rblocks == 0)
+               return;
+       
+       /*
+        * realtime init -- blockmap initialization is
+        * handled by incore_init()
+        */
+       /*
+       sumfile = calloc(mp->m_rsumsize, 1);
+       */
+       if ((btmcompute = calloc(mp->m_sb.sb_rbmblocks *
+                       mp->m_sb.sb_blocksize, 1)) == NULL)
+               do_error(
+               "couldn't allocate memory for incore realtime bitmap.\n");
+
+       if ((sumcompute = calloc(mp->m_rsumsize, 1)) == NULL)
+               do_error(
+               "couldn't allocate memory for incore realtime summary info.\n");
+}
+
+/*
+ * generate the real-time bitmap and summary info based on the
+ * incore realtime extent map.
+ */
+int
+generate_rtinfo(xfs_mount_t    *mp,
+               xfs_rtword_t    *words,
+               xfs_suminfo_t   *sumcompute)
+{
+       xfs_drtbno_t    extno;
+       xfs_drtbno_t    start_ext;
+       int             bitsperblock;
+       int             bmbno;
+       xfs_rtword_t    freebit;
+       xfs_rtword_t    bits;
+       int             start_bmbno;
+       int             i;
+       int             offs;
+       int             log;
+       int             len;
+       int             in_extent;
+
+       ASSERT(mp->m_rbmip == NULL);
+
+       bitsperblock = mp->m_sb.sb_blocksize * NBBY;
+       extno = start_ext = 0;
+       bmbno = in_extent = start_bmbno = 0;
+
+       /*
+        * slower but simple, don't play around with trying to set
+        * things one word at a time, just set bit as required.
+        * Have to * track start and end (size) of each range of
+        * free extents to set the summary info properly.
+        */
+       while (extno < mp->m_sb.sb_rextents)  {
+               freebit = 1;
+               *words = 0;
+               bits = 0;
+               for (i = 0; i < sizeof(xfs_rtword_t) * NBBY &&
+                               extno < mp->m_sb.sb_rextents; i++, extno++)  {
+                       if (get_rtbno_state(mp, extno) == XR_E_FREE)  {
+                               sb_frextents++;
+                               bits |= freebit;
+
+                               if (in_extent == 0) {
+                                       start_ext = extno;
+                                       start_bmbno = bmbno;
+                                       in_extent = 1;
+                               }
+                       } else if (in_extent == 1) {
+                               len = (int) (extno - start_ext);
+                               log = XFS_RTBLOCKLOG(len);
+                               offs = XFS_SUMOFFS(mp, log, start_bmbno);
+                               sumcompute[offs]++;
+                               in_extent = 0;
+                       }
+
+                       freebit <<= 1;
+               }
+               *words = bits;
+               words++;
+
+               if (extno % bitsperblock == 0)
+                       bmbno++;
+       }
+       if (in_extent == 1) {
+               len = (int) (extno - start_ext);
+               log = XFS_RTBLOCKLOG(len);
+               offs = XFS_SUMOFFS(mp, log, start_bmbno);
+               sumcompute[offs]++;
+       }
+
+       return(0);
+}
+
+#if 0
+/*
+ * returns 1 if bad, 0 if good
+ */
+int
+check_summary(xfs_mount_t *mp)
+{
+       xfs_drfsbno_t   bno;
+       xfs_suminfo_t   *csp;
+       xfs_suminfo_t   *fsp;
+       int             log;
+       int             error = 0;
+
+       error = 0;
+       csp = sumcompute;
+       fsp = sumfile;
+       for (log = 0; log < mp->m_rsumlevels; log++) {
+               for (bno = 0;
+                    bno < mp->m_sb.sb_rbmblocks;
+                    bno++, csp++, fsp++) {
+                       if (*csp != *fsp) {
+                               do_warn(
+       "rt summary mismatch, size %d block %llu, file: %d, computed: %d\n",
+                                               log, bno, *fsp, *csp);
+                               error = 1;
+                       }
+               }
+       }
+
+       return(error);
+}
+
+/*
+ * examine the real-time bitmap file and compute summary
+ * info off it.  Should probably be changed to compute
+ * the summary information off the incore computed bitmap
+ * instead of the realtime bitmap file
+ */
+void
+process_rtbitmap(xfs_mount_t   *mp,
+               xfs_dinode_t    *dino,
+               blkmap_t        *blkmap)
+{
+       int             error;
+       int             bit;
+       int             bitsperblock;
+       int             bmbno;
+       int             end_bmbno;
+       xfs_dfsbno_t    bno;
+       xfs_buf_t       *bp;
+       xfs_drtbno_t    extno;
+       int             i;
+       int             len;
+       int             log;
+       int             offs;
+       int             prevbit;
+       int             start_bmbno;
+       int             start_bit;
+       xfs_rtword_t    *words;
+
+       ASSERT(mp->m_rbmip == NULL);
+
+       bitsperblock = mp->m_sb.sb_blocksize * NBBY;
+       prevbit = 0;
+       extno = 0;
+       error = 0;
+
+       end_bmbno = howmany(INT_GET(dino->di_core.di_size, ARCH_CONVERT), mp->m_sb.sb_blocksize);
+
+       for (bmbno = 0; bmbno < end_bmbno; bmbno++) {
+               bno = blkmap_get(blkmap, bmbno);
+
+               if (bno == NULLDFSBNO) {
+                       do_warn("can't find block %d for rtbitmap inode\n",
+                                       bmbno);
+                       error = 1;
+                       continue;
+               }
+               bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, bno),
+                               XFS_FSB_TO_BB(mp, 1));
+               if (!bp) {
+                       do_warn("can't read block %d for rtbitmap inode\n",
+                                       bmbno);
+                       error = 1;
+                       continue;
+               }
+               words = (xfs_rtword_t *)bp->b_un.b_addr;
+               for (bit = 0;
+                    bit < bitsperblock && extno < mp->m_sb.sb_rextents;
+                    bit++, extno++) {
+                       if (isset(words, bit)) {
+                               set_rtbno_state(mp, extno, XR_E_FREE);
+                               sb_frextents++;
+                               if (prevbit == 0) {
+                                       start_bmbno = bmbno;
+                                       start_bit = bit;
+                                       prevbit = 1;
+                               }
+                       } else if (prevbit == 1) {
+                               len = (bmbno - start_bmbno) * bitsperblock +
+                                       (bit - start_bit);
+                               log = XFS_RTBLOCKLOG(len);
+                               offs = XFS_SUMOFFS(mp, log, start_bmbno);
+                               sumcompute[offs]++;
+                               prevbit = 0;
+                       }
+               }
+               libxfs_putbuf(bp);
+               if (extno == mp->m_sb.sb_rextents)
+                       break;
+       }
+       if (prevbit == 1) {
+               len = (bmbno - start_bmbno) * bitsperblock + (bit - start_bit);
+               log = XFS_RTBLOCKLOG(len);
+               offs = XFS_SUMOFFS(mp, log, start_bmbno);
+               sumcompute[offs]++;
+       }
+}
+
+/*
+ * copy the real-time summary file data into memory
+ */
+void
+process_rtsummary(xfs_mount_t  *mp,
+               xfs_dinode_t    *dino,
+               blkmap_t        *blkmap)
+{
+       xfs_fsblock_t   bno;
+       xfs_buf_t       *bp;
+       char            *bytes;
+       int             sumbno;
+
+       for (sumbno = 0; sumbno < blkmap->count; sumbno++) {
+               bno = blkmap_get(blkmap, sumbno);
+               if (bno == NULLDFSBNO) {
+                       do_warn("block %d for rtsummary inode is missing\n",
+                                       sumbno);
+                       error++;
+                       continue;
+               }
+               bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, bno),
+                               XFS_FSB_TO_BB(mp, 1));
+               if (!bp) {
+                       do_warn("can't read block %d for rtsummary inode\n",
+                                       sumbno);
+                       error++;
+                       continue;
+               }
+               bytes = bp->b_un.b_addr;
+               bcopy(bytes, (char *)sumfile + sumbno * mp->m_sb.sb_blocksize,
+                       mp->m_sb.sb_blocksize);
+               libxfs_putbuf(bp);
+       }
+}
+#endif
diff --git a/repair/rt.h b/repair/rt.h
new file mode 100644 (file)
index 0000000..d29241d
--- /dev/null
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+struct blkmap;
+
+void
+rtinit(xfs_mount_t             *mp);
+
+int
+generate_rtinfo(xfs_mount_t    *mp,
+               xfs_rtword_t    *words,
+               xfs_suminfo_t   *sumcompute);
+
+#if 0
+
+int
+check_summary(xfs_mount_t      *mp);
+
+void
+process_rtbitmap(xfs_mount_t   *mp,
+               xfs_dinode_t    *dino,
+               struct blkmap   *blkmap);
+
+void
+process_rtsummary(xfs_mount_t  *mp,
+               struct blkmap   *blkmap);
+#endif
diff --git a/repair/sb.c b/repair/sb.c
new file mode 100644 (file)
index 0000000..5133f20
--- /dev/null
@@ -0,0 +1,824 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <errno.h>
+#include <libxfs.h>
+#include <malloc.h>
+#include "agheader.h"
+#include "globals.h"
+#include "protos.h"
+#include "err_protos.h"
+
+
+/*
+ * copy the fields of a superblock that are present in primary and
+ * secondaries -- preserve fields that are different in the primary.
+ */
+void
+copy_sb(xfs_sb_t *source, xfs_sb_t *dest)
+{
+       xfs_ino_t       rootino;
+       xfs_ino_t       rbmino;
+       xfs_ino_t       rsumino;
+       xfs_ino_t       uquotino;
+       xfs_ino_t       pquotino;
+       __uint16_t      versionnum;
+
+       rootino = dest->sb_rootino;
+       rbmino = dest->sb_rbmino;
+       rsumino = dest->sb_rsumino;
+       uquotino = dest->sb_uquotino;
+       pquotino = dest->sb_pquotino;
+
+       versionnum = dest->sb_versionnum;
+
+       *dest = *source;
+
+       dest->sb_rootino = rootino;
+       dest->sb_rbmino = rbmino;
+       dest->sb_rsumino = rsumino;
+       dest->sb_uquotino = uquotino;
+       dest->sb_pquotino = pquotino;
+
+       dest->sb_versionnum = versionnum;
+
+       /*
+        * copy over version bits that are stamped into all
+        * secondaries and cannot be changed at run time in
+        * the primary superblock
+        */
+       if (XFS_SB_VERSION_HASDALIGN(source))
+               XFS_SB_VERSION_ADDDALIGN(dest);
+       if (XFS_SB_VERSION_HASEXTFLGBIT(source))
+               XFS_SB_VERSION_ADDEXTFLGBIT(dest);
+
+       /*
+        * these are all supposed to be zero or will get reset anyway
+        */
+       dest->sb_icount = 0;
+       dest->sb_ifree = 0;
+       dest->sb_fdblocks = 0;
+       dest->sb_frextents = 0;
+
+       bzero(source->sb_fname, 12);
+}
+
+#define BSIZE  (1024 * 1024)
+
+/*
+ * find a secondary superblock, copy it into the sb buffer
+ */
+int
+find_secondary_sb(xfs_sb_t *rsb)
+{
+       xfs_off_t       off;
+       xfs_sb_t        *sb;
+       xfs_sb_t        bufsb;
+       char            *c_bufsb;
+       int             done;
+       int             i;
+       int             dirty;
+       int             retval;
+       int             bsize;
+
+       do_warn("\nattempting to find secondary superblock...\n");
+
+       sb = (xfs_sb_t *) memalign(MEM_ALIGN, BSIZE);
+       if (!sb) {
+               do_error(
+       "error finding secondary superblock -- failed to memalign buffer\n");
+               exit(1);
+       }
+
+       bzero(&bufsb, sizeof(xfs_sb_t));
+       retval = 0;
+       dirty = 0;
+       bsize = 0;
+
+       /*
+        * skip first sector since we know that's bad
+        */
+       for (done = 0, off = XFS_AG_MIN_BYTES; !done ; off += bsize)  {
+               /*
+                * read disk 1 MByte at a time.
+                */
+               if (lseek64(fs_fd, off, SEEK_SET) != off)  {
+                       done = 1;
+               }
+
+               if (!done && (bsize = read(fs_fd, sb, BSIZE)) == 0)  {
+                       done = 1;
+               }
+
+               do_warn(".");
+
+               /*
+                * check the buffer 512 bytes at a time since
+                * we don't know how big the sectors really are.
+                */
+               for (i = 0; !done && i < bsize; i += BBSIZE)  {
+                       c_bufsb = (char *) sb + i;
+                       libxfs_xlate_sb(c_bufsb, &bufsb, 1, ARCH_CONVERT,
+                               XFS_SB_ALL_BITS);
+
+                       if (verify_sb(&bufsb, 0) != XR_OK)
+                               continue;
+
+                       do_warn("found candidate secondary superblock...\n");
+
+                       /*
+                        * found one.  now verify it by looking
+                        * for other secondaries.
+                        */
+                       bcopy(&bufsb, rsb, bufsb.sb_sectsize);
+                       rsb->sb_inprogress = 0;
+                       clear_sunit = 1;
+
+                       if (verify_set_primary_sb(rsb, 0, &dirty) == XR_OK)  {
+                               do_warn("verified secondary superblock...\n");
+                               done = 1;
+                               retval = 1;
+                       } else  {
+                               do_warn(
+                               "unable to verify superblock, continuing...\n");
+                       }
+               }
+       }
+
+       free(sb);
+       return(retval);
+}
+
+/*
+ * calculate what inode alignment field ought to be
+ * based on internal superblock info
+ */
+int
+calc_ino_align(xfs_sb_t *sb)
+{
+       xfs_extlen_t align;
+
+       align = XFS_INODE_BIG_CLUSTER_SIZE >> sb->sb_blocklog;
+
+       return(align);
+}
+
+/*
+ * verify a superblock -- does not verify root inode #
+ *     can only check that geometry info is internally
+ *     consistent.  because of growfs, that's no guarantee
+ *     of correctness (e.g. geometry may have changed)
+ *
+ * fields verified or consistency checked:
+ *
+ *                     sb_magicnum
+ *
+ *                     sb_versionnum
+ *
+ *                     sb_inprogress
+ *
+ *                     sb_blocksize    (as a group)
+ *                     sb_blocklog
+ *
+ * geometry info -     sb_dblocks      (as a group)
+ *                     sb_agcount
+ *                     sb_agblocks
+ *                     sb_agblklog
+ *
+ * inode info -                sb_inodesize    (x-checked with geo info)
+ *                     sb_inopblock
+ *
+ * sector size info -
+ *                     sb_sectsize
+ *                     sb_sectlog
+ *
+ * not checked here -
+ *                     sb_rootino
+ *                     sb_fname
+ *                     sb_fpack
+ *                     sb_logstart
+ *                     sb_uuid
+ *
+ *                     ALL real-time fields
+ *                     final 4 summary counters
+ */
+
+int
+verify_sb(xfs_sb_t *sb, int is_primary_sb)
+{
+       __uint32_t      bsize;
+       xfs_extlen_t    align;
+       int             i;
+       
+       /* check magic number and version number */
+
+       if (sb->sb_magicnum != XFS_SB_MAGIC)
+               return(XR_BAD_MAGIC);
+
+       if (!XFS_SB_GOOD_VERSION(sb))
+               return(XR_BAD_VERSION);
+
+       /* does sb think mkfs really finished ? */
+
+       if (is_primary_sb && sb->sb_inprogress == 1)
+               return(XR_BAD_INPROGRESS);
+
+       /* check to make sure blocksize is legal 2^N, 9 <= N <= 16 */
+
+       if (sb->sb_blocksize == 0)
+               return(XR_BAD_BLOCKSIZE);
+
+       bsize = 1;
+
+       for (i = 0; bsize < sb->sb_blocksize && i < 32; i++)  {
+               bsize <<= 1;
+       }
+
+       if (i < XR_LOG2BSIZE_MIN || i > XR_LOG2BSIZE_MAX)
+               return(XR_BAD_BLOCKSIZE);
+
+       /* check sb blocksize field against sb blocklog field */
+
+       if (i != sb->sb_blocklog)
+               return(XR_BAD_BLOCKLOG);
+
+       /* sanity check ag count, size fields against data size field */
+
+       if (sb->sb_dblocks == 0 ||
+               sb->sb_dblocks > sb->sb_agcount * sb->sb_agblocks ||
+               sb->sb_dblocks < (sb->sb_agcount - 1)
+                       * sb->sb_agblocks + XFS_MIN_AG_BLOCKS)
+               return(XR_BAD_FS_SIZE_DATA);
+
+       if (sb->sb_agblklog != (__uint8_t)libxfs_log2_roundup(sb->sb_agblocks))
+               return(XR_BAD_FS_SIZE_DATA);
+
+       if (sb->sb_inodesize < XFS_DINODE_MIN_SIZE ||
+               sb->sb_inodesize > XFS_DINODE_MAX_SIZE ||
+               sb->sb_inopblock != howmany(sb->sb_blocksize,sb->sb_inodesize))
+               return(XR_BAD_INO_SIZE_DATA);
+
+       /* check sector size against log(sector size) field */
+
+       bsize = 1;
+
+       for (i = 0; bsize < sb->sb_sectsize && i < 15; i++)  {
+               bsize <<= 1;
+       }
+
+       if (sb->sb_sectsize == 0 || i == 16 ||
+                       sb->sb_sectsize != (1 << i))
+               return(XR_BAD_SECT_SIZE_DATA);
+
+       /*
+        * real-time extent size is always set
+        */
+       if (sb->sb_rextsize * sb->sb_blocksize > XFS_MAX_RTEXTSIZE)
+               return(XR_BAD_RT_GEO_DATA);
+
+       if (sb->sb_rextsize * sb->sb_blocksize < XFS_MIN_RTEXTSIZE)
+                       return(XR_BAD_RT_GEO_DATA);
+
+       if (sb->sb_rblocks == 0)  {
+               if (sb->sb_rextents != 0)
+                       return(XR_BAD_RT_GEO_DATA);
+
+               if (sb->sb_rbmblocks != 0)
+                       return(XR_BAD_RT_GEO_DATA);
+
+               if (sb->sb_rextslog != 0)
+                       return(XR_BAD_RT_GEO_DATA);
+
+               if (sb->sb_frextents != 0)
+                       return(XR_BAD_RT_GEO_DATA);
+       } else  {
+               /*
+                * if we have a real-time partition, sanity-check geometry
+                */
+               if (sb->sb_rblocks / sb->sb_rextsize != sb->sb_rextents)
+                       return(XR_BAD_RT_GEO_DATA);
+
+               if (sb->sb_rextslog !=
+                               libxfs_highbit32((unsigned int)sb->sb_rextents))
+                       return(XR_BAD_RT_GEO_DATA);
+
+               if (sb->sb_rbmblocks != (xfs_extlen_t) howmany(sb->sb_rextents,
+                                               NBBY * sb->sb_blocksize))
+                       return(XR_BAD_RT_GEO_DATA);
+       }
+
+       /*
+        * verify correctness of inode alignment if it's there
+        */
+       if (XFS_SB_VERSION_HASALIGN(sb))  {
+               align = calc_ino_align(sb);
+
+               if (align != sb->sb_inoalignmt)
+                       return(XR_BAD_INO_ALIGN);
+       }
+
+       /*
+        * verify max. % of inodes (sb_imax_pct)
+        */
+       if (sb->sb_imax_pct > 100)
+               return(XR_BAD_INO_MAX_PCT);
+
+       /*
+        * verify stripe alignment fields if present
+        */
+       if (XFS_SB_VERSION_HASDALIGN(sb)) {
+               if ((!sb->sb_unit && sb->sb_width) || 
+                   (sb->sb_unit && sb->sb_agblocks % sb->sb_unit)) 
+                       return(XR_BAD_SB_UNIT);
+               if ((sb->sb_unit && !sb->sb_width) ||
+                   (sb->sb_width && sb->sb_unit && sb->sb_width % sb->sb_unit))
+                       return(XR_BAD_SB_WIDTH);
+       }
+
+       /*
+        * if shared bit is set, verify that the version number is sane
+        */
+       if (XFS_SB_VERSION_HASSHARED(sb))  {
+               if (sb->sb_shared_vn > XFS_SB_MAX_SHARED_VN)
+                       return(XR_BAD_SVN);
+       }
+
+       /*
+        * mkfs's that stamped a feature bit besides the ones in the
+        * mask below could leave garbage in the secondary superblock
+        * sectors.  Anything stamping the shared fs bit or better into
+        * the secondaries is ok and should generate clean secondary
+        * superblock sectors.
+        *
+        * check primary and clean secondary superblocks more strictly
+        */
+       if (is_primary_sb || sb->sb_versionnum & XR_PART_SECSB_VNMASK)  {
+               /*
+                * return errors if shared vn or alignment fields
+                * are set without their feature bits being set
+                */
+               if (!pre_65_beta && sb->sb_versionnum & XR_PART_SECSB_VNMASK ||
+                   pre_65_beta && sb->sb_versionnum & XR_ALPHA_SECSB_VNMASK)  {
+                       /*
+                        * shared version # and inode alignment fields
+                        * should be valid
+                        */
+                       if (sb->sb_shared_vn && !XFS_SB_VERSION_HASSHARED(sb))
+                               return(XR_BAD_SVN);
+                       if (sb->sb_inoalignmt && !XFS_SB_VERSION_HASALIGN(sb))
+                               return(XR_BAD_INO_ALIGN);
+               }
+               if ((!pre_65_beta &&
+                    (sb->sb_versionnum & XR_GOOD_SECSB_VNMASK)) ||
+                   (pre_65_beta &&
+                    (sb->sb_versionnum & XFS_SB_VERSION_DALIGNBIT)))  {
+                       /*
+                        * stripe alignment values should be valid
+                        */
+                       if (sb->sb_unit && !XFS_SB_VERSION_HASDALIGN(sb))
+                               return(XR_BAD_SB_UNIT);
+                       if (sb->sb_width && !XFS_SB_VERSION_HASDALIGN(sb))
+                               return(XR_BAD_SB_WIDTH);
+               }
+
+#if 0
+               /*
+                * checks involving later superblock fields get added here...
+                */
+               if (sb->sb_versionnum & XR_GOOD_SECSB_VNMASK)  {
+               }
+#endif
+       }
+
+       return(XR_OK);
+}
+
+void
+write_primary_sb(xfs_sb_t *sbp, int size)
+{
+        void *buf;
+        
+       if (no_modify)
+               return;
+        
+        if ((buf = calloc(size, 1)) == NULL) {
+               do_error("failed to malloc superblock buffer\n");
+                return;
+       }
+
+       if (lseek64(fs_fd, 0LL, SEEK_SET) != 0LL) {
+                free(buf);
+               do_error("couldn't seek to offset 0 in filesystem\n");
+        }
+        
+       libxfs_xlate_sb(buf, sbp, -1, ARCH_CONVERT, XFS_SB_ALL_BITS);
+
+       if (write(fs_fd, buf, size) != size) {
+                free(buf);
+               do_error("primary superblock write failed!\n");
+        }
+
+        free(buf);
+}
+
+/*
+ * get a possible superblock -- don't check for internal consistency
+ */
+int
+get_sb(xfs_sb_t *sbp, xfs_off_t off, int size, xfs_agnumber_t agno)
+{
+       int error, rval;
+        void *buf;
+        
+        if ((buf = calloc(size, 1)) == NULL) {
+               do_error(
+       "error reading superblock %u -- failed to malloc buffer\n",
+                       agno, off);
+               exit(1);
+       }
+
+       /* try and read it first */
+
+       if (lseek64(fs_fd, off, SEEK_SET) != off)  {
+               do_warn(
+       "error reading superblock %u -- seek to offset %lld failed\n",
+                       agno, off);
+               return(XR_EOF);
+       }
+
+       if ((rval = read(fs_fd, buf, size)) != size)  {
+               error = errno;
+               do_warn(
+"superblock read failed, offset %lld, size %d, ag %u, rval %d\n",
+                       off, size, rval, agno);
+               do_error("%s\n", strerror(error));
+       }
+       libxfs_xlate_sb(buf, sbp, 1, ARCH_CONVERT, XFS_SB_ALL_BITS);
+       free(buf);
+
+       return (verify_sb(sbp, 0));
+}
+
+#if 0
+int
+check_growfs(xfs_off_t off, int bufnum, xfs_agnumber_t agnum)
+{
+       int rval;
+
+       ASSERT(bufnum < NUM_SBS);
+
+       /* try and read it first */
+
+       if (lseek64(fs_fd, off, SEEK_SET) != off)
+               return(XR_EOF);
+
+       if ((rval = read(fs_fd, sb_bufs[bufnum], sbbuf_size)) != sbbuf_size)  {
+               /*
+                * we didn't get a full block so the filesystem
+                * could not have been grown.  return a non-XR_OK
+                * result code.
+                */
+               return(XR_EOF);
+       }
+
+       return(get_sb(off, bufnum, agnum));
+}
+#endif
+/* returns element on list with highest reference count */
+
+fs_geo_list_t *
+get_best_geo(fs_geo_list_t *list)
+{
+       int cnt = 0;
+       fs_geo_list_t *current, *rval = NULL;
+
+       current = list;
+
+       while (current != NULL)  {
+               if (current->refs > cnt)  {
+                       rval = current;
+                       cnt = current->refs;
+               }
+               current = current->next;
+       }
+
+       return(rval);
+}
+
+/* adds geometry info to linked list.  returns (sometimes new) head of list */
+
+fs_geo_list_t *
+add_geo(fs_geo_list_t *list, fs_geometry_t *geo_p, int index)
+{
+       fs_geo_list_t   *current = list;
+       
+       while (current != NULL)  {
+               if (memcmp(geo_p, &current->geo, sizeof(fs_geometry_t)) == 0)  {
+                       current->refs++;
+                       return(list);
+               }
+
+               current = current->next;
+       }
+
+       if ((current = malloc(sizeof(fs_geo_list_t))) == NULL) {
+               do_error("couldn't malloc geometry structure\n");
+               exit(1);
+       }
+
+       current->geo = *geo_p;
+       current->refs = 1;
+       current->next = list;
+       current->index = index;
+
+       return(current);
+}
+
+void
+free_geo(fs_geo_list_t *list)
+{
+       fs_geo_list_t   *next;
+       fs_geo_list_t   *current;
+
+       current = list;
+
+       for (current = list; current != NULL; current = next)  {
+               next = current->next;
+               free(current);
+       }
+}
+
+void
+get_sb_geometry(fs_geometry_t *geo, xfs_sb_t *sbp)
+{
+       bzero(geo, sizeof(fs_geometry_t));
+
+       /*
+        * blindly set fields that we know are always good
+        */
+       geo->sb_blocksize = sbp->sb_blocksize;
+       geo->sb_dblocks = sbp->sb_dblocks;
+       geo->sb_rblocks = sbp->sb_rblocks;
+       geo->sb_rextents = sbp->sb_rextents;
+       geo->sb_logstart = sbp->sb_logstart;
+       geo->sb_rextsize = sbp->sb_rextsize;
+       geo->sb_agblocks = sbp->sb_agblocks;
+       geo->sb_agcount = sbp->sb_agcount;
+       geo->sb_rbmblocks = sbp->sb_rbmblocks;
+       geo->sb_logblocks = sbp->sb_logblocks;
+       geo->sb_sectsize = sbp->sb_sectsize;
+       geo->sb_inodesize = sbp->sb_inodesize;
+
+       if (XFS_SB_VERSION_HASALIGN(sbp))
+               geo->sb_ialignbit = 1;
+
+       if (XFS_SB_VERSION_HASSHARED(sbp) ||
+           sbp->sb_versionnum & XR_PART_SECSB_VNMASK)
+               geo->sb_sharedbit = 1;
+
+       if (XFS_SB_VERSION_HASDALIGN(sbp))
+               geo->sb_salignbit = 1;
+
+       if (XFS_SB_VERSION_HASEXTFLGBIT(sbp))
+               geo->sb_extflgbit = 1;
+
+       /*
+        * protect against pre-6.5 mkfs-generated garbaged
+        * fields in the secondary superblocks.  pay attention
+        * to those fields if and only if their corresponding
+        * feature bits are set in the feature bits of the
+        * version number or we can deduce from the version bits
+        * that are set that our field was properly initialized
+        * because a field after the field we care about was
+        * properly initialized as well.
+        */
+
+       /*
+        * inode alignment field lives before the data alignment field
+        */
+       if (!pre_65_beta && sbp->sb_versionnum & XR_PART_SECSB_VNMASK ||
+           pre_65_beta && sbp->sb_versionnum & XR_ALPHA_SECSB_VNMASK)
+               geo->sb_inoalignmt = sbp->sb_inoalignmt;
+
+       if (!pre_65_beta && sbp->sb_versionnum & XR_GOOD_SECSB_VNMASK ||
+           pre_65_beta && XFS_SB_VERSION_HASDALIGN(sbp))  {
+               geo->sb_unit = sbp->sb_unit;
+               geo->sb_width = sbp->sb_width;
+       }
+
+       /*
+        * shared vn always set if either ino or data alignment is on
+        * since that field lives between the quota and inode alignment
+        * fields
+        */
+       if (sbp->sb_versionnum & XR_PART_SECSB_VNMASK)
+               geo->sb_shared_vn = sbp->sb_shared_vn;
+
+       /*
+        * superblock fields located after sb_widthfields get set
+        * into the geometry structure only if we can determine
+        * from the features enabled in this superblock whether
+        * or not the sector was bzero'd at mkfs time.
+        */
+       if (!pre_65_beta && sbp->sb_versionnum & XR_GOOD_SECSB_VNMASK ||
+           pre_65_beta && sbp->sb_versionnum & XR_ALPHA_SECSB_VNMASK)  {
+               geo->sb_fully_zeroed = 1;
+       }
+}
+
+/*
+ * the way to verify that a primary sb is consistent with the
+ * filesystem is find the secondaries given the info in the
+ * primary and compare the geometries in the secondaries against
+ * the geometry indicated by the primary.
+ *
+ * returns 1 if bad, 0 if ok
+ */
+int
+verify_set_primary_sb(xfs_sb_t         *rsb,
+                       int             sb_index,
+                       int             *sb_modified)
+{
+       xfs_off_t               off;
+       fs_geometry_t   geo;
+       xfs_sb_t        *sb;
+       fs_geo_list_t   *list;
+       fs_geo_list_t   *current;
+       char            *checked;
+       xfs_agnumber_t  agno;
+       int             num_sbs;
+       int             skip;
+       int             size;
+       int             num_ok;
+       int             retval;
+       int             round;
+
+       /*
+        * select the number of secondaries to try for
+        */
+       num_sbs = MIN(NUM_SBS, rsb->sb_agcount);
+       skip = howmany(num_sbs, rsb->sb_agcount);
+       size = NUM_AGH_SECTS * rsb->sb_sectsize;
+       retval = 0;
+       list = NULL;
+       num_ok = 0;
+       *sb_modified = 0;
+
+       sb = (xfs_sb_t *) alloc_ag_buf(size);
+       checked = calloc(rsb->sb_agcount, sizeof(char));
+       if (!checked) {
+               do_error("calloc failed in verify_set_primary_sb\n");
+               exit(1);
+       }
+
+       /*
+        * put the primary sb geometry info onto the geometry list
+        */
+       checked[sb_index] = 1;
+       get_sb_geometry(&geo, rsb);
+       list = add_geo(list, &geo, sb_index);
+
+       /*
+        * grab N secondaries.  check them off as we get them
+        * so we only process each one once
+        */
+       for (round = 0; round < skip; round++)  {
+               for (agno = round; agno < rsb->sb_agcount; agno += skip)  {
+                       if (checked[agno])
+                               continue;
+
+                       off = (xfs_off_t)agno * rsb->sb_agblocks << rsb->sb_blocklog;
+
+                       checked[agno] = 1;
+
+                       if (get_sb(sb, off, size, agno) == XR_EOF)  {
+                               retval = 1;
+                               goto out;
+                       }
+
+                       if (verify_sb(sb, 0) == XR_OK)  {
+                               /*
+                                * save away geometry info.
+                                * don't bother checking the sb
+                                * against the agi/agf as the odds
+                                * of the sb being corrupted in a way
+                                * that it is internally consistent
+                                * but not consistent with the rest
+                                * of the filesystem is really really low.
+                                */
+                               get_sb_geometry(&geo, sb);
+                               list = add_geo(list, &geo, agno);
+                               num_ok++;
+                       }
+               }
+       }
+
+       /*
+        * see if we have enough superblocks to bother with
+        */
+       if (num_ok < num_sbs / 2)
+               return(XR_INSUFF_SEC_SB);
+
+       current = get_best_geo(list);
+
+       /*
+        * check that enough sbs agree that we're willing to
+        * go with this geometry.  if not, print out the
+        * geometry and a message about the force option.
+        */
+       switch (num_sbs)  {
+       case 2:
+               /*
+                * all them have to be right.  if not, report geometry
+                * and get out unless force option is in effect (-F)
+                */
+               if (current->refs != 2)  {
+                       if (!force_geo)  {
+                               do_warn("Only two AGs detected and they do not match - cannot proceed.\n");
+                               exit(1);
+                       }
+               }
+               break;
+       case 1:
+               /*
+                * just report the geometry info and get out.
+                * refuse to run further unless the force (-F)
+                * option is in effect.
+                */
+               if (!force_geo)  {
+                       do_warn("Only one AG detected - cannot proceed.\n");
+                       exit(1);
+               }
+       default:
+               /*
+                * at least half of the probed superblocks have
+                * to agree.  if they don't, this fs is probably
+                * too far gone anyway considering the fact that
+                * XFS normally doesn't alter the secondary superblocks.
+                */
+               if (current->refs < num_sbs / 2)  {
+                       do_warn("Not enough matching superblocks - cannot proceed.\n");
+                       exit(1);
+               }
+       }
+
+       /*
+        * set the geometry into primary superblock if necessary.
+        */
+
+       if (current->index != sb_index)  {
+               *sb_modified = 1;
+               off = current->index * current->geo.sb_agblocks 
+                       * current->geo.sb_blocksize;
+               if (get_sb(sb, off, current->geo.sb_sectsize,
+                               current->index) != XR_OK)
+                       do_error("could not read superblock\n");
+
+               copy_sb(sb, rsb);
+
+               /*
+                * turn off inprogress bit since this is the primary.
+                * also save away values that we need to ensure are
+                * consistent in the other secondaries.
+                */
+               rsb->sb_inprogress = 0;
+               sb_inoalignmt = sb->sb_inoalignmt;
+               sb_unit = sb->sb_unit;
+               sb_width = sb->sb_width;
+       }
+
+       free_geo(list);
+out:
+       free(sb);
+       free(checked);
+       return(retval);
+}
diff --git a/repair/scan.c b/repair/scan.c
new file mode 100644 (file)
index 0000000..e6228a2
--- /dev/null
@@ -0,0 +1,1279 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "avl.h"
+#include "globals.h"
+#include "agheader.h"
+#include "incore.h"
+#include "protos.h"
+#include "err_protos.h"
+#include "dinode.h"
+#include "scan.h"
+#include "versions.h"
+#include "bmap.h"
+
+extern int verify_set_agheader(xfs_mount_t *mp, xfs_buf_t *sbuf, xfs_sb_t *sb,
+               xfs_agf_t *agf, xfs_agi_t *agi, xfs_agnumber_t i);
+
+static xfs_mount_t     *mp = NULL;
+static xfs_extlen_t    bno_agffreeblks;
+static xfs_extlen_t    cnt_agffreeblks;
+static xfs_extlen_t    bno_agflongest;
+static xfs_extlen_t    cnt_agflongest;
+static xfs_agino_t     agicount;
+static xfs_agino_t     agifreecount;
+
+void
+set_mp(xfs_mount_t *mpp)
+{
+       mp = mpp;
+}
+
+void
+scan_sbtree(
+       xfs_agblock_t   root,
+       int             nlevels,
+       xfs_agnumber_t  agno,
+       int             suspect,
+       void            (*func)(xfs_btree_sblock_t      *block,
+                               int                     level,
+                               xfs_agblock_t           bno,
+                               xfs_agnumber_t          agno,
+                               int                     suspect,
+                               int                     isroot),
+       int             isroot)
+{
+       xfs_buf_t       *bp;
+
+       bp = libxfs_readbuf(mp->m_dev, XFS_AGB_TO_DADDR(mp, agno, root),
+                       XFS_FSB_TO_BB(mp, 1), 0);
+       if (!bp) {
+               do_error("can't read btree block %d/%d\n", agno, root);
+               return;
+       }
+       (*func)((xfs_btree_sblock_t *)XFS_BUF_PTR(bp),
+               nlevels - 1, root, agno, suspect, isroot);
+       libxfs_putbuf(bp);
+}
+
+/*
+ * returns 1 on bad news (inode needs to be cleared), 0 on good
+ */
+int
+scan_lbtree(
+       xfs_dfsbno_t    root,
+       int             nlevels,
+       int             (*func)(xfs_btree_lblock_t      *block,
+                               int                     level,
+                               int                     type,
+                               int                     whichfork,
+                               xfs_dfsbno_t            bno,
+                               xfs_ino_t               ino,
+                               xfs_drfsbno_t           *tot,
+                               __uint64_t              *nex,
+                               blkmap_t                **blkmapp,
+                               bmap_cursor_t           *bm_cursor,
+                               int                     isroot,
+                               int                     check_dups,
+                               int                     *dirty),
+       int             type,
+       int             whichfork,
+       xfs_ino_t       ino,
+       xfs_drfsbno_t   *tot,
+       __uint64_t      *nex,
+       blkmap_t        **blkmapp,
+       bmap_cursor_t   *bm_cursor,
+       int             isroot,
+       int             check_dups)
+{
+       xfs_buf_t       *bp;
+       int             err;
+       int             dirty = 0;
+
+       bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, root),
+                     XFS_FSB_TO_BB(mp, 1), 0);
+       if (!bp)  {
+               do_error("can't read btree block %d/%d\n",
+                       XFS_FSB_TO_AGNO(mp, root),
+                       XFS_FSB_TO_AGBNO(mp, root));
+               return(1);
+       }
+       err = (*func)((xfs_btree_lblock_t *)XFS_BUF_PTR(bp), nlevels - 1,
+                       type, whichfork, root, ino, tot, nex, blkmapp,
+                       bm_cursor, isroot, check_dups, &dirty);
+
+       ASSERT(dirty == 0 || dirty && !no_modify);
+
+       if (dirty && !no_modify)
+               libxfs_writebuf(bp, 0);
+       else
+               libxfs_putbuf(bp);
+
+       return(err);
+}
+
+int
+scanfunc_bmap(
+       xfs_btree_lblock_t      *ablock,
+       int                     level,
+       int                     type,
+       int                     whichfork,
+       xfs_dfsbno_t            bno,
+       xfs_ino_t               ino,
+       xfs_drfsbno_t           *tot,
+       __uint64_t              *nex,
+       blkmap_t                **blkmapp,
+       bmap_cursor_t           *bm_cursor,
+       int                     isroot,
+       int                     check_dups,
+       int                     *dirty)
+{
+       xfs_bmbt_block_t        *block = (xfs_bmbt_block_t *)ablock;
+       int                     i;
+       int                     err;
+       xfs_bmbt_ptr_t          *pp;
+       xfs_bmbt_key_t          *pkey;
+       xfs_bmbt_rec_32_t       *rp;
+       xfs_dfiloff_t           first_key;
+       xfs_dfiloff_t           last_key;
+       char                    *forkname;
+
+       if (whichfork == XFS_DATA_FORK)
+               forkname = "data";
+       else
+               forkname = "attr";
+
+       /*
+        * unlike the ag freeblock btrees, if anything looks wrong 
+        * in an inode bmap tree, just bail.  it's possible that
+        * we'll miss a case where the to-be-toasted inode and
+        * another inode are claiming the same block but that's
+        * highly unlikely.
+        */
+       if (INT_GET(block->bb_magic, ARCH_CONVERT) != XFS_BMAP_MAGIC) {
+               do_warn(
+               "bad magic # %#x in inode %llu (%s fork) bmbt block %llu\n",
+                       INT_GET(block->bb_magic, ARCH_CONVERT), ino, forkname, bno);
+               return(1);
+       }
+       if (INT_GET(block->bb_level, ARCH_CONVERT) != level) {
+               do_warn(
+       "expected level %d got %d in inode %llu, (%s fork) bmbt block %llu\n",
+                       level, INT_GET(block->bb_level, ARCH_CONVERT), ino, forkname, bno);
+               return(1);
+       }
+
+       if (check_dups == 0)  {
+               /*
+                * check sibling pointers. if bad we have a conflict
+                * between the sibling pointers and the child pointers
+                * in the parent block.  blow out the inode if that happens
+                */
+               if (bm_cursor->level[level].fsbno != NULLDFSBNO)  {
+                       /*
+                        * this is not the first block on this level
+                        * so the cursor for this level has recorded the
+                        * values for this's block left-sibling.
+                        */
+                       if (bno != bm_cursor->level[level].right_fsbno)  {
+                               do_warn(
+       "bad fwd (right) sibling pointer (saw %llu parent block says %llu)\n",
+                                       bm_cursor->level[level].right_fsbno,
+                                       bno);
+                               do_warn(
+               "\tin inode %llu (%s fork) bmap btree block %llu\n",
+                                       ino, forkname,
+                                       bm_cursor->level[level].fsbno);
+                               return(1);
+                       }
+                       if (INT_GET(block->bb_leftsib, ARCH_CONVERT) !=
+                                       bm_cursor->level[level].fsbno)  {
+                               do_warn(
+       "bad back (left) sibling pointer (saw %llu parent block says %llu)\n",
+                                       INT_GET(block->bb_leftsib, ARCH_CONVERT),
+                                       bm_cursor->level[level].fsbno);
+                               do_warn(
+               "\tin inode %llu (%s fork) bmap btree block %llu\n",
+                                       ino, forkname, bno);
+                               return(1);
+                       }
+               } else {
+                       /*
+                        * This is the first or only block on this level.
+                        * Check that the left sibling pointer is NULL
+                        */
+                       if (INT_GET(block->bb_leftsib, ARCH_CONVERT) !=
+                                       NULLDFSBNO)  {
+                               do_warn(
+       "bad back (left) sibling pointer (saw %llu should be NULL (0))\n",
+                               INT_GET(block->bb_leftsib, ARCH_CONVERT));
+                               do_warn(
+               "\tin inode %llu (%s fork) bmap btree block %llu\n",
+                                       ino, forkname, bno);
+                               return(1);
+                       }
+               }
+
+               /*
+                * update cursor block pointers to reflect this block
+                */
+               bm_cursor->level[level].fsbno = bno;
+               bm_cursor->level[level].left_fsbno = INT_GET(block->bb_leftsib, ARCH_CONVERT);
+               bm_cursor->level[level].right_fsbno = INT_GET(block->bb_rightsib, ARCH_CONVERT);
+
+               switch (get_fsbno_state(mp, bno))  {
+               case XR_E_UNKNOWN:
+               case XR_E_FREE1:
+               case XR_E_FREE:
+                       set_fsbno_state(mp, bno, XR_E_INUSE);
+                       break;
+               case XR_E_FS_MAP:
+               case XR_E_INUSE:
+                       /*
+                        * we'll try and continue searching here since
+                        * the block looks like it's been claimed by file
+                        * to store user data, a directory to store directory
+                        * data, or the space allocation btrees but since
+                        * we made it here, the block probably
+                        * contains btree data.
+                        */
+                       set_fsbno_state(mp, bno, XR_E_MULT);
+                       do_warn(
+               "inode 0x%llx bmap block 0x%llx claimed, state is %d\n",
+                               ino, (__uint64_t) bno,
+                               get_fsbno_state(mp, bno));
+                       break;
+               case XR_E_MULT:
+               case XR_E_INUSE_FS:
+                       set_fsbno_state(mp, bno, XR_E_MULT);
+                       do_warn(
+               "inode 0x%llx bmap block 0x%llx claimed, state is %d\n",
+                               ino, (__uint64_t) bno,
+                               get_fsbno_state(mp, bno));
+                       /*
+                        * if we made it to here, this is probably a bmap block
+                        * that is being used by *another* file as a bmap block
+                        * so the block will be valid.  Both files should be
+                        * trashed along with any other file that impinges on
+                        * any blocks referenced by either file.  So we
+                        * continue searching down this btree to mark all
+                        * blocks duplicate
+                        */
+                       break;
+               case XR_E_BAD_STATE:
+               default:
+                       do_warn(
+               "bad state %d, inode 0x%llx bmap block 0x%llx\n",
+                               get_fsbno_state(mp, bno),
+                               ino, (__uint64_t) bno);
+                       break;
+               }
+       } else  {
+               /*
+                * attribute fork for realtime files is in the regular
+                * filesystem
+                */
+               if (type != XR_INO_RTDATA || whichfork != XFS_DATA_FORK)  {
+                       if (search_dup_extent(mp, XFS_FSB_TO_AGNO(mp, bno),
+                                       XFS_FSB_TO_AGBNO(mp, bno)))
+                               return(1);
+               } else  {
+                       if (search_rt_dup_extent(mp, bno))
+                               return(1);
+               }
+       }
+       (*tot)++;
+       if (level == 0) {
+               if (INT_GET(block->bb_numrecs, ARCH_CONVERT) > mp->m_bmap_dmxr[0] ||
+                   isroot == 0 && INT_GET(block->bb_numrecs, ARCH_CONVERT) < mp->m_bmap_dmnr[0])  {
+do_warn("inode 0x%llx bad # of bmap records (%u, min - %u, max - %u)\n",
+                               ino, INT_GET(block->bb_numrecs, ARCH_CONVERT),
+                               mp->m_bmap_dmnr[0], mp->m_bmap_dmxr[0]);
+                       return(1);
+               }
+               rp = (xfs_bmbt_rec_32_t *)
+                       XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt,
+                       block, 1, mp->m_bmap_dmxr[0]);
+               *nex += INT_GET(block->bb_numrecs, ARCH_CONVERT);
+               /*
+                * XXX - if we were going to fix up the btree record,
+                * we'd do it right here.  For now, if there's a problem,
+                * we'll bail out and presumably clear the inode.
+                */
+               if (check_dups == 0)  {
+                       err = process_bmbt_reclist(mp, rp, INT_GET(block->bb_numrecs, ARCH_CONVERT),
+                                       type, ino, tot, blkmapp,
+                                       &first_key, &last_key,
+                                       whichfork);
+                       if (err)
+                               return(1);
+                       /*
+                        * check that key ordering is monotonically increasing.
+                        * if the last_key value in the cursor is set to
+                        * NULLDFILOFF, then we know this is the first block
+                        * on the leaf level and we shouldn't check the
+                        * last_key value.
+                        */
+                       if (first_key <= bm_cursor->level[level].last_key &&
+                                       bm_cursor->level[level].last_key !=
+                                       NULLDFILOFF)  {
+                               do_warn(
+"out-of-order bmap key (file offset) in inode %llu, %s fork, fsbno %llu\n",
+                                       ino, forkname, bno);
+                               return(1);
+                       }
+                       /*
+                        * update cursor keys to reflect this block.
+                        * don't have to check if last_key is > first_key
+                        * since that gets checked by process_bmbt_reclist.
+                        */
+                       bm_cursor->level[level].first_key = first_key;
+                       bm_cursor->level[level].last_key = last_key;
+
+                       return(0);
+               } else
+                       return(scan_bmbt_reclist(mp, rp, INT_GET(block->bb_numrecs, ARCH_CONVERT),
+                                               type, ino, tot, whichfork));
+       }
+       if (INT_GET(block->bb_numrecs, ARCH_CONVERT) > mp->m_bmap_dmxr[1] ||
+           isroot == 0 && INT_GET(block->bb_numrecs, ARCH_CONVERT) < mp->m_bmap_dmnr[1])  {
+do_warn("inode 0x%llx bad # of bmap records (%u, min - %u, max - %u)\n",
+                       ino, INT_GET(block->bb_numrecs, ARCH_CONVERT),
+                       mp->m_bmap_dmnr[1], mp->m_bmap_dmxr[1]);
+               return(1);
+       }
+       pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, block, 1,
+               mp->m_bmap_dmxr[1]);
+       pkey = XFS_BTREE_KEY_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, block, 1,
+               mp->m_bmap_dmxr[1]);
+
+       last_key = NULLDFILOFF;
+
+       for (i = 0, err = 0; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++)  {
+               /*
+                * XXX - if we were going to fix up the interior btree nodes,
+                * we'd do it right here.  For now, if there's a problem,
+                * we'll bail out and presumably clear the inode.
+                */
+               if (!verify_dfsbno(mp, INT_GET(pp[i], ARCH_CONVERT)))  {
+                       do_warn("bad bmap btree ptr 0x%llx in ino %llu\n",
+                               INT_GET(pp[i], ARCH_CONVERT), ino);
+                       return(1);
+               }
+
+               err = scan_lbtree(INT_GET(pp[i], ARCH_CONVERT), level, scanfunc_bmap, type, whichfork,
+                               ino, tot, nex, blkmapp, bm_cursor, 0,
+                               check_dups);
+               if (err)
+                       return(1);
+
+               /*
+                * fix key (offset) mismatches between the first key
+                * in the child block (as recorded in the cursor) and the
+                * key in the interior node referencing the child block.
+                *
+                * fixes cases where entries have been shifted between
+                * child blocks but the parent hasn't been updated.  We
+                * don't have to worry about the key values in the cursor
+                * not being set since we only look at the key values of
+                * our child and those are guaranteed to be set by the
+                * call to scan_lbtree() above.
+                */
+               if (check_dups == 0 && INT_GET(pkey[i].br_startoff, ARCH_CONVERT) !=
+                                       bm_cursor->level[level-1].first_key)  {
+                       if (!no_modify)  {
+                               do_warn(
+               "correcting bt key (was %llu, now %llu) in inode %llu\n",
+                                       INT_GET(pkey[i].br_startoff, ARCH_CONVERT),
+                                       bm_cursor->level[level-1].first_key,
+                                       ino);
+                               do_warn("\t\t%s fork, btree block %llu\n",
+                                       forkname, bno);
+                               *dirty = 1;
+                               INT_SET(pkey[i].br_startoff, ARCH_CONVERT, bm_cursor->level[level-1].first_key);
+                       } else  {
+                               do_warn(
+"bad btree key (is %llu, should be %llu) in inode %llu\n",
+                                       INT_GET(pkey[i].br_startoff, ARCH_CONVERT),
+                                       bm_cursor->level[level-1].first_key,
+                                       ino);
+                               do_warn("\t\t%s fork, btree block %llu\n",
+                                       forkname, bno);
+                       }
+               }
+       }
+
+       /*
+        * Check that the last child block's forward sibling pointer
+        * is NULL.
+        */
+       if (check_dups == 0 && 
+               bm_cursor->level[level - 1].right_fsbno != NULLDFSBNO)  {
+               do_warn(
+       "bad fwd (right) sibling pointer (saw %llu should be NULLDFSBNO)\n",
+                       bm_cursor->level[level - 1].right_fsbno);
+               do_warn(
+               "\tin inode %llu (%s fork) bmap btree block %llu\n",
+                       ino, forkname,
+                       bm_cursor->level[level].fsbno);
+               return(1);
+       }
+
+       /*
+        * update cursor keys to reflect this block
+        */
+       if (check_dups == 0)  {
+               bm_cursor->level[level].first_key =
+                               INT_GET(pkey[0].br_startoff, ARCH_CONVERT);
+               i = INT_GET(block->bb_numrecs, ARCH_CONVERT) - 1;
+               bm_cursor->level[level].last_key =
+                               INT_GET(pkey[i].br_startoff, ARCH_CONVERT);
+       }
+
+       return(0);
+}
+
+void
+scanfunc_bno(
+       xfs_btree_sblock_t      *ablock,
+       int                     level,
+       xfs_agblock_t           bno,
+       xfs_agnumber_t          agno,
+       int                     suspect,
+       int                     isroot
+       )
+{
+       xfs_agblock_t           b;
+       xfs_alloc_block_t       *block = (xfs_alloc_block_t *)ablock;
+       int                     i;
+       xfs_alloc_ptr_t         *pp;
+       xfs_alloc_rec_t         *rp;
+       int                     hdr_errors = 0;
+       int                     numrecs;
+       int                     state;
+
+       if (INT_GET(block->bb_magic, ARCH_CONVERT) != XFS_ABTB_MAGIC) {
+               do_warn("bad magic # %#x in btbno block %d/%d\n",
+                       INT_GET(block->bb_magic, ARCH_CONVERT), agno, bno);
+               hdr_errors++;
+               if (suspect)
+                       return;
+       }
+       if (INT_GET(block->bb_level, ARCH_CONVERT) != level) {
+               do_warn("expected level %d got %d in btbno block %d/%d\n",
+                       level, INT_GET(block->bb_level, ARCH_CONVERT), agno, bno);
+               hdr_errors++;
+               if (suspect)
+                       return;
+       }
+
+       /*
+        * check for btree blocks multiply claimed
+        */
+       state = get_agbno_state(mp, agno, bno);
+
+       switch (state)  {
+       case XR_E_UNKNOWN:
+               set_agbno_state(mp, agno, bno, XR_E_FS_MAP);
+               break;
+       default:
+               set_agbno_state(mp, agno, bno, XR_E_MULT);
+               do_warn(
+"bno freespace btree block claimed (state %d), agno %d, bno %d, suspect %d\n",
+                               state, agno, bno, suspect);
+               return;
+       }
+
+       if (level == 0) {
+               numrecs = INT_GET(block->bb_numrecs, ARCH_CONVERT);
+
+               if (INT_GET(block->bb_numrecs, ARCH_CONVERT) > mp->m_alloc_mxr[0])  {
+                       numrecs = mp->m_alloc_mxr[0];
+                       hdr_errors++;
+               }
+               if (isroot == 0 && INT_GET(block->bb_numrecs, ARCH_CONVERT) < mp->m_alloc_mnr[0])  {
+                       numrecs = mp->m_alloc_mnr[0];
+                       hdr_errors++;
+               }
+
+               if (hdr_errors)
+                       suspect++;
+
+               rp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, block,
+                       1, mp->m_alloc_mxr[0]);
+               for (i = 0; i < numrecs; i++) {
+                       if (INT_GET(rp[i].ar_blockcount, ARCH_CONVERT) == 0 ||
+                               INT_GET(rp[i].ar_startblock, ARCH_CONVERT) == 0 ||
+                               !verify_agbno(mp, agno, INT_GET(rp[i].ar_startblock, ARCH_CONVERT)) ||
+                               INT_GET(rp[i].ar_blockcount, ARCH_CONVERT) > MAXEXTLEN)
+                               continue;
+
+                       bno_agffreeblks += INT_GET(rp[i].ar_blockcount, ARCH_CONVERT);
+                       if (INT_GET(rp[i].ar_blockcount, ARCH_CONVERT) > bno_agflongest)
+                               bno_agflongest = INT_GET(rp[i].ar_blockcount, ARCH_CONVERT);
+                       for (b = INT_GET(rp[i].ar_startblock, ARCH_CONVERT);
+                            b < INT_GET(rp[i].ar_startblock, ARCH_CONVERT) + INT_GET(rp[i].ar_blockcount, ARCH_CONVERT);
+                            b++)  {
+                               if (get_agbno_state(mp, agno, b)
+                                                       == XR_E_UNKNOWN)
+                                       set_agbno_state(mp, agno, b,
+                                                       XR_E_FREE1);
+                               else  {
+do_warn("block (%d,%d) multiply claimed by bno space tree, state - %d\n",
+                                       agno, b, get_agbno_state(mp, agno, b));
+                               }
+                       }
+               }
+               return;
+       }
+
+       /*
+        * interior record
+        */
+       pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, block, 1,
+               mp->m_alloc_mxr[1]);
+
+       numrecs = INT_GET(block->bb_numrecs, ARCH_CONVERT);
+       if (INT_GET(block->bb_numrecs, ARCH_CONVERT) > mp->m_alloc_mxr[1])  {
+               numrecs = mp->m_alloc_mxr[1];
+               hdr_errors++;
+       }
+       if (isroot == 0 && INT_GET(block->bb_numrecs, ARCH_CONVERT) < mp->m_alloc_mnr[1])  {
+               numrecs = mp->m_alloc_mnr[1];
+               hdr_errors++;
+       }
+
+       /*
+        * don't pass bogus tree flag down further if this block
+        * looked ok.  bail out if two levels in a row look bad.
+        */
+
+       if (suspect && !hdr_errors)
+               suspect = 0;
+
+       if (hdr_errors)  {
+               if (suspect)
+                       return;
+               else suspect++;
+       }
+
+       for (i = 0; i < numrecs; i++)  {
+               /*
+                * XXX - put sibling detection right here.
+                * we know our sibling chain is good.  So as we go,
+                * we check the entry before and after each entry.
+                * If either of the entries references a different block,
+                * check the sibling pointer.  If there's a sibling
+                * pointer mismatch, try and extract as much data
+                * as possible.  
+                */
+               if (INT_GET(pp[i], ARCH_CONVERT) != 0 && verify_agbno(mp, agno, INT_GET(pp[i], ARCH_CONVERT)))
+                       scan_sbtree(INT_GET(pp[i], ARCH_CONVERT), level, agno, suspect,
+                               scanfunc_bno, 0);
+       }
+}
+
+void
+scanfunc_cnt(
+       xfs_btree_sblock_t      *ablock,
+       int                     level,
+       xfs_agblock_t           bno,
+       xfs_agnumber_t          agno,
+       int                     suspect,
+       int                     isroot
+       )
+{
+       xfs_alloc_block_t       *block;
+       xfs_alloc_ptr_t         *pp;
+       xfs_alloc_rec_t         *rp;
+       xfs_agblock_t           b;
+       int                     i;
+       int                     hdr_errors;
+       int                     numrecs;
+       int                     state;
+
+       block = (xfs_alloc_block_t *)ablock;
+       hdr_errors = 0;
+
+       if (INT_GET(block->bb_magic, ARCH_CONVERT) != XFS_ABTC_MAGIC) {
+               do_warn("bad magic # %#x in btcnt block %d/%d\n",
+                       INT_GET(block->bb_magic, ARCH_CONVERT), agno, bno);
+               hdr_errors++;
+               if (suspect)
+                       return;
+       }
+       if (INT_GET(block->bb_level, ARCH_CONVERT) != level) {
+               do_warn("expected level %d got %d in btcnt block %d/%d\n",
+                       level, INT_GET(block->bb_level, ARCH_CONVERT), agno, bno);
+               hdr_errors++;
+               if (suspect)
+                       return;
+       }
+
+       /*
+        * check for btree blocks multiply claimed
+        */
+       state = get_agbno_state(mp, agno, bno);
+
+       switch (state)  {
+       case XR_E_UNKNOWN:
+               set_agbno_state(mp, agno, bno, XR_E_FS_MAP);
+               break;
+       default:
+               set_agbno_state(mp, agno, bno, XR_E_MULT);
+               do_warn(
+"bcnt freespace btree block claimed (state %d), agno %d, bno %d, suspect %d\n",
+                               state, agno, bno, suspect);
+               return;
+       }
+
+       if (level == 0) {
+               numrecs = INT_GET(block->bb_numrecs, ARCH_CONVERT);
+
+               if (INT_GET(block->bb_numrecs, ARCH_CONVERT) > mp->m_alloc_mxr[0])  {
+                       numrecs = mp->m_alloc_mxr[0];
+                       hdr_errors++;
+               }
+               if (isroot == 0 && INT_GET(block->bb_numrecs, ARCH_CONVERT) < mp->m_alloc_mnr[0])  {
+                       numrecs = mp->m_alloc_mnr[0];
+                       hdr_errors++;
+               }
+
+               if (hdr_errors)
+                       suspect++;
+
+               rp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, block,
+                       1, mp->m_alloc_mxr[0]);
+               for (i = 0; i < numrecs; i++) {
+                       if (INT_GET(rp[i].ar_blockcount, ARCH_CONVERT) == 0 ||
+                               INT_GET(rp[i].ar_startblock, ARCH_CONVERT) == 0 ||
+                               !verify_agbno(mp, agno, INT_GET(rp[i].ar_startblock, ARCH_CONVERT)) ||
+                               INT_GET(rp[i].ar_blockcount, ARCH_CONVERT) > MAXEXTLEN)
+                               continue;
+
+                       cnt_agffreeblks += INT_GET(rp[i].ar_blockcount, ARCH_CONVERT);
+                       if (INT_GET(rp[i].ar_blockcount, ARCH_CONVERT) > cnt_agflongest)
+                               cnt_agflongest = INT_GET(rp[i].ar_blockcount, ARCH_CONVERT);
+                       for (b = INT_GET(rp[i].ar_startblock, ARCH_CONVERT);
+                            b < INT_GET(rp[i].ar_startblock, ARCH_CONVERT) + INT_GET(rp[i].ar_blockcount, ARCH_CONVERT);
+                            b++)  {
+                               state = get_agbno_state(mp, agno, b);
+                               /*
+                                * no warning messages -- we'll catch
+                                * FREE1 blocks later
+                                */
+                               switch (state)  {
+                               case XR_E_FREE1:
+                                       set_agbno_state(mp, agno, b, XR_E_FREE);
+                                       break;
+                               case XR_E_UNKNOWN:
+                                       set_agbno_state(mp, agno, b,
+                                                       XR_E_FREE1);
+                                       break;
+                               default:
+                                       do_warn(
+                               "block (%d,%d) already used, state %d\n",
+                                               agno, b, state);
+                                       break;
+                               }
+                       }
+               }
+               return;
+       }
+
+       /*
+        * interior record
+        */
+       pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, block, 1,
+               mp->m_alloc_mxr[1]);
+
+       numrecs = INT_GET(block->bb_numrecs, ARCH_CONVERT);
+       if (INT_GET(block->bb_numrecs, ARCH_CONVERT) > mp->m_alloc_mxr[1])  {
+               numrecs = mp->m_alloc_mxr[1];
+               hdr_errors++;
+       }
+       if (isroot == 0 && INT_GET(block->bb_numrecs, ARCH_CONVERT) < mp->m_alloc_mnr[1])  {
+               numrecs = mp->m_alloc_mnr[1];
+               hdr_errors++;
+       }
+
+       /*
+        * don't pass bogus tree flag down further if this block
+        * looked ok.  bail out if two levels in a row look bad.
+        */
+
+       if (suspect && !hdr_errors)
+               suspect = 0;
+
+       if (hdr_errors)  {
+               if (suspect)
+                       return;
+               else suspect++;
+       }
+
+       for (i = 0; i < numrecs; i++)
+               if (INT_GET(pp[i], ARCH_CONVERT) != 0 && verify_agbno(mp, agno, INT_GET(pp[i], ARCH_CONVERT)))
+                       scan_sbtree(INT_GET(pp[i], ARCH_CONVERT), level, agno,
+                               suspect, scanfunc_cnt, 0);
+}
+
+/*
+ * this one walks the inode btrees sucking the info there into
+ * the incore avl tree.  We try and rescue corrupted btree records
+ * to minimize our chances of losing inodes.  Inode info from potentially
+ * corrupt sources could be bogus so rather than put the info straight
+ * into the tree, instead we put it on a list and try and verify the
+ * info in the next phase by examining what's on disk.  At that point,
+ * we'll be able to figure out what's what and stick the corrected info
+ * into the tree.  We do bail out at some point and give up on a subtree
+ * so as to avoid walking randomly all over the ag.
+ *
+ * Note that it's also ok if the free/inuse info wrong, we can correct
+ * that when we examine the on-disk inode.  The important thing is to
+ * get the start and alignment of the inode chunks right.  Those chunks
+ * that we aren't sure about go into the uncertain list.
+ */
+void
+scanfunc_ino(
+       xfs_btree_sblock_t      *ablock,
+       int                     level,
+       xfs_agblock_t           bno,
+       xfs_agnumber_t          agno,
+       int                     suspect,
+       int                     isroot
+       )
+{
+       xfs_ino_t               lino;
+       xfs_inobt_block_t       *block;
+       int                     i;
+       xfs_agino_t             ino;
+       xfs_agblock_t           agbno;
+       int                     j;
+       int                     nfree;
+       int                     off;
+       int                     numrecs;
+       int                     state;
+       xfs_inobt_ptr_t         *pp;
+       xfs_inobt_rec_t         *rp;
+       ino_tree_node_t         *ino_rec, *first_rec, *last_rec;
+       int                     hdr_errors;
+
+       block = (xfs_inobt_block_t *)ablock;
+       hdr_errors = 0;
+
+       if (INT_GET(block->bb_magic, ARCH_CONVERT) != XFS_IBT_MAGIC) {
+               do_warn("bad magic # %#x in inobt block %d/%d\n",
+                       INT_GET(block->bb_magic, ARCH_CONVERT), agno, bno);
+               hdr_errors++;
+               bad_ino_btree = 1;
+               if (suspect)
+                       return;
+       }
+       if (INT_GET(block->bb_level, ARCH_CONVERT) != level) {
+               do_warn("expected level %d got %d in inobt block %d/%d\n",
+                               level, INT_GET(block->bb_level, ARCH_CONVERT), agno, bno);
+               hdr_errors++;
+               bad_ino_btree = 1;
+               if (suspect)
+                       return;
+       }
+
+       /*
+        * check for btree blocks multiply claimed, any unknown/free state
+        * is ok in the bitmap block.
+        */
+       state = get_agbno_state(mp, agno, bno);
+
+       switch (state)  {
+       case XR_E_UNKNOWN:
+       case XR_E_FREE1:
+       case XR_E_FREE:
+               set_agbno_state(mp, agno, bno, XR_E_FS_MAP);
+               break;
+       default:
+               set_agbno_state(mp, agno, bno, XR_E_MULT);
+               do_warn(
+"inode btree block claimed (state %d), agno %d, bno %d, suspect %d\n",
+                               state, agno, bno, suspect);
+       }
+
+       numrecs = INT_GET(block->bb_numrecs, ARCH_CONVERT);
+
+       /*
+        * leaf record in btree
+        */
+       if (level == 0) {
+               /* check for trashed btree block */
+
+               if (INT_GET(block->bb_numrecs, ARCH_CONVERT) > mp->m_inobt_mxr[0])  {
+                       numrecs = mp->m_inobt_mxr[0];
+                       hdr_errors++;
+               }
+               if (isroot == 0 && INT_GET(block->bb_numrecs, ARCH_CONVERT) < mp->m_inobt_mnr[0])  {
+                       numrecs = mp->m_inobt_mnr[0];
+                       hdr_errors++;
+               }
+
+               if (hdr_errors)  {
+                       bad_ino_btree = 1;
+                       do_warn("dubious inode btree block header %d/%d\n",
+                               agno, bno);
+                       suspect++;
+               }
+
+               rp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_inobt, block,
+                       1, mp->m_inobt_mxr[0]);
+
+               /*
+                * step through the records, each record points to
+                * a chunk of inodes.  The start of inode chunks should
+                * be block-aligned.  Each inode btree rec should point
+                * to the start of a block of inodes or the start of a group
+                * of INODES_PER_CHUNK (64) inodes.  off is the offset into
+                * the block.  skip processing of bogus records.
+                */
+               for (i = 0; i < numrecs; i++) {
+                       ino = INT_GET(rp[i].ir_startino, ARCH_CONVERT);
+                       off = XFS_AGINO_TO_OFFSET(mp, ino);
+                       agbno = XFS_AGINO_TO_AGBNO(mp, ino);
+                       lino = XFS_AGINO_TO_INO(mp, agno, ino);
+                       /*
+                        * on multi-block block chunks, all chunks start
+                        * at the beginning of the block.  with multi-chunk
+                        * blocks, all chunks must start on 64-inode boundaries
+                        * since each block can hold N complete chunks. if
+                        * fs has aligned inodes, all chunks must start
+                        * at a fs_ino_alignment*N'th agbno.  skip recs
+                        * with badly aligned starting inodes.
+                        */
+                       if (ino == 0 ||
+                           (inodes_per_block <= XFS_INODES_PER_CHUNK &&
+                            off !=  0) ||
+                           (inodes_per_block > XFS_INODES_PER_CHUNK &&
+                            off % XFS_INODES_PER_CHUNK != 0) ||
+                           (fs_aligned_inodes &&
+                            agbno % fs_ino_alignment != 0))  {
+                               do_warn(
+                       "badly aligned inode rec (starting inode = %llu)\n",
+                                       lino);
+                               suspect++;
+                       }
+
+                       /*
+                        * verify numeric validity of inode chunk first
+                        * before inserting into a tree.  don't have to
+                        * worry about the overflow case because the
+                        * starting ino number of a chunk can only get
+                        * within 255 inodes of max (NULLAGINO).  if it
+                        * gets closer, the agino number will be illegal
+                        * as the agbno will be too large.
+                        */
+                       if (verify_aginum(mp, agno, ino))  {
+                               do_warn(
+"bad starting inode # (%llu (0x%x 0x%x)) in ino rec, skipping rec\n",
+                                       lino, agno, ino);
+                               suspect++;
+                               continue;
+                       }
+
+                       if (verify_aginum(mp, agno,
+                                       ino + XFS_INODES_PER_CHUNK - 1))  {
+                               do_warn(
+"bad ending inode # (%llu (0x%x 0x%x)) in ino rec, skipping rec\n",
+                                       lino + XFS_INODES_PER_CHUNK - 1,
+                                       agno, ino + XFS_INODES_PER_CHUNK - 1);
+                               suspect++;
+                               continue;
+                       }
+
+                       /*
+                        * set state of each block containing inodes
+                        */
+                       if (off == 0 && !suspect)  {
+                               for (j = 0;
+                                    j < XFS_INODES_PER_CHUNK;
+                                    j += mp->m_sb.sb_inopblock)  {
+                                       agbno = XFS_AGINO_TO_AGBNO(mp, ino + j);
+                                       state = get_agbno_state(mp,
+                                                       agno, agbno);
+
+                                       if (state == XR_E_UNKNOWN)  {
+                                               set_agbno_state(mp, agno,
+                                                       agbno, XR_E_INO);
+                                       } else if (state == XR_E_INUSE_FS &&
+                                               agno == 0 &&
+                                               ino + j >= first_prealloc_ino &&
+                                               ino + j < last_prealloc_ino)  {
+                                               set_agbno_state(mp, agno,
+                                                       agbno, XR_E_INO);
+                                       } else  {
+                                               do_warn(
+"inode chunk claims used block, inobt block - agno %d, bno %d, inopb %d\n",
+                                                       agno, bno,
+                                                       mp->m_sb.sb_inopblock);
+                                               suspect++;
+                                               /*
+                                                * XXX - maybe should mark
+                                                * block a duplicate
+                                                */
+                                               continue;
+                                       }
+                               }
+                       }
+                       /*
+                        * ensure only one avl entry per chunk
+                        */
+                       find_inode_rec_range(agno, ino,
+                                       ino + XFS_INODES_PER_CHUNK,
+                                       &first_rec,
+                                       &last_rec);
+                       if (first_rec != NULL)  {
+                               /*
+                                * this chunk overlaps with one (or more)
+                                * already in the tree
+                                */
+                               do_warn(
+"inode rec for ino %llu (%d/%d) overlaps existing rec (start %d/%d)\n",
+                                       lino, agno, ino,
+                                       agno, first_rec->ino_startnum);
+                               suspect++;
+
+                               /*
+                                * if the 2 chunks start at the same place,
+                                * then we don't have to put this one
+                                * in the uncertain list.  go to the next one.
+                                */
+                               if (first_rec->ino_startnum == ino)
+                                       continue;
+                       }
+
+                       agicount += XFS_INODES_PER_CHUNK;
+                       agifreecount += INT_GET(rp[i].ir_freecount, ARCH_CONVERT);
+                       nfree = 0;
+
+                       /*
+                        * now mark all the inodes as existing and free or used.
+                        * if the tree is suspect, put them into the uncertain
+                        * inode tree.
+                        */
+                       if (!suspect)  {
+                               if (XFS_INOBT_IS_FREE(&rp[i], 0, ARCH_CONVERT)) {
+                                       nfree++;
+                                       ino_rec = set_inode_free_alloc(agno,
+                                                                       ino);
+                               } else  {
+                                       ino_rec = set_inode_used_alloc(agno,
+                                                                       ino);
+                               }
+                               for (j = 1; j < XFS_INODES_PER_CHUNK; j++) {
+                                       if (XFS_INOBT_IS_FREE(&rp[i], j, ARCH_CONVERT)) {
+                                               nfree++;
+                                               set_inode_free(ino_rec, j);
+                                       } else  {
+                                               set_inode_used(ino_rec, j);
+                                       }
+                               }
+                       } else  {
+                               for (j = 0; j < XFS_INODES_PER_CHUNK; j++) {
+                                       if (XFS_INOBT_IS_FREE(&rp[i], j, ARCH_CONVERT)) {
+                                               nfree++;
+                                               add_aginode_uncertain(agno,
+                                                               ino + j, 1);
+                                       } else  {
+                                               add_aginode_uncertain(agno,
+                                                               ino + j, 0);
+                                       }
+                               }
+                       }
+
+                       if (nfree != INT_GET(rp[i].ir_freecount, ARCH_CONVERT)) {
+                               do_warn( "ir_freecount/free mismatch, inode chunk \
+%d/%d, freecount %d nfree %d\n",
+                                       agno, ino, INT_GET(rp[i].ir_freecount, ARCH_CONVERT), nfree);
+                       }
+               }
+
+               if (suspect)
+                       bad_ino_btree = 1;
+
+               return;
+       }
+
+       /*
+        * interior record, continue on
+        */
+       if (INT_GET(block->bb_numrecs, ARCH_CONVERT) > mp->m_inobt_mxr[1])  {
+               numrecs = mp->m_inobt_mxr[1];
+               hdr_errors++;
+       }
+       if (isroot == 0 && INT_GET(block->bb_numrecs, ARCH_CONVERT) < mp->m_inobt_mnr[1])  {
+               numrecs = mp->m_inobt_mnr[1];
+               hdr_errors++;
+       }
+
+       pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_inobt, block, 1,
+               mp->m_inobt_mxr[1]);
+
+       /*
+        * don't pass bogus tree flag down further if this block
+        * looked ok.  bail out if two levels in a row look bad.
+        */
+
+       if (suspect && !hdr_errors)
+               suspect = 0;
+
+       if (hdr_errors)  {
+               bad_ino_btree = 1;
+               if (suspect)
+                       return;
+               else suspect++;
+       }
+
+       for (i = 0; i < numrecs; i++)  {
+               if (INT_GET(pp[i], ARCH_CONVERT) != 0 && verify_agbno(mp, agno, INT_GET(pp[i], ARCH_CONVERT)))
+                       scan_sbtree(INT_GET(pp[i], ARCH_CONVERT), level, agno, suspect,
+                                       scanfunc_ino, 0);
+       }
+}
+
+void
+scan_freelist(
+       xfs_agf_t       *agf)
+{
+       xfs_agfl_t      *agfl;
+       xfs_buf_t       *agflbuf;
+       xfs_agblock_t   bno;
+       int             count;
+       int             i;
+
+       if (XFS_SB_BLOCK(mp) != XFS_AGFL_BLOCK(mp) &&
+           XFS_AGF_BLOCK(mp) != XFS_AGFL_BLOCK(mp) &&
+           XFS_AGI_BLOCK(mp) != XFS_AGFL_BLOCK(mp))
+               set_agbno_state(mp, INT_GET(agf->agf_seqno, ARCH_CONVERT),
+                       XFS_AGFL_BLOCK(mp), XR_E_FS_MAP);
+       if (INT_GET(agf->agf_flcount, ARCH_CONVERT) == 0)
+               return;
+       agflbuf = libxfs_readbuf(mp->m_dev,
+                       XFS_AG_DADDR(mp, INT_GET(agf->agf_seqno, ARCH_CONVERT),
+                               XFS_AGFL_DADDR), 1, 0);
+       if (!agflbuf)  {
+               do_abort("can't read agfl block for ag %d\n",
+                       INT_GET(agf->agf_seqno, ARCH_CONVERT));
+               return;
+       }
+       agfl = XFS_BUF_TO_AGFL(agflbuf);
+       i = INT_GET(agf->agf_flfirst, ARCH_CONVERT);
+       count = 0;
+       for (;;) {
+               bno = INT_GET(agfl->agfl_bno[i], ARCH_CONVERT);
+               if (verify_agbno(mp, INT_GET(agf->agf_seqno,ARCH_CONVERT), bno))
+                       set_agbno_state(mp,
+                               INT_GET(agf->agf_seqno, ARCH_CONVERT),
+                               bno, XR_E_FREE);
+               else
+                       do_warn("bad agbno %u in agfl, agno %d\n",
+                               bno, INT_GET(agf->agf_seqno, ARCH_CONVERT));
+               count++;
+               if (i == INT_GET(agf->agf_fllast, ARCH_CONVERT))
+                       break;
+               if (++i == XFS_AGFL_SIZE)
+                       i = 0;
+       }
+       if (count != INT_GET(agf->agf_flcount, ARCH_CONVERT)) {
+               do_warn("freeblk count %d != flcount %d in ag %d\n", count,
+                       INT_GET(agf->agf_flcount, ARCH_CONVERT),
+                       INT_GET(agf->agf_seqno, ARCH_CONVERT));
+       }
+       libxfs_putbuf(agflbuf);
+}
+
+void
+scan_ag(
+       xfs_agnumber_t  agno)
+{
+       xfs_agf_t       *agf;
+       xfs_buf_t       *agfbuf;
+       int             agf_dirty;
+       xfs_agi_t       *agi;
+       xfs_buf_t       *agibuf;
+       int             agi_dirty;
+       xfs_sb_t        *sb;
+       xfs_buf_t       *sbbuf;
+       int             sb_dirty;
+       int             status;
+
+       cnt_agffreeblks = cnt_agflongest = 0;
+       bno_agffreeblks = bno_agflongest = 0;
+
+       agi_dirty = agf_dirty = sb_dirty = 0;
+
+       agicount = agifreecount = 0;
+
+       sbbuf = libxfs_readbuf(mp->m_dev, XFS_AG_DADDR(mp, agno, XFS_SB_DADDR),
+                               1, 0);
+       if (!sbbuf)  {
+               do_error("can't get root superblock for ag %d\n", agno);
+               return;
+       }
+
+        sb = (xfs_sb_t *)calloc(BBSIZE, 1);
+        if (!sb) {
+            do_error("can't allocate memory for superblock\n");
+            libxfs_putbuf(sbbuf);
+            return;
+        }
+       libxfs_xlate_sb(XFS_BUF_TO_SBP(sbbuf), sb, 1, ARCH_CONVERT,
+                       XFS_SB_ALL_BITS);
+
+       agfbuf = libxfs_readbuf(mp->m_dev,
+                       XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR), 1, 0);
+       if (!agfbuf)  {
+               do_error("can't read agf block for ag %d\n", agno);
+               libxfs_putbuf(sbbuf);
+                free(sb);
+               return;
+       }
+       agf = XFS_BUF_TO_AGF(agfbuf);
+
+       agibuf = libxfs_readbuf(mp->m_dev,
+                       XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR), 1, 0);
+       if (!agibuf)  {
+               do_error("can't read agi block for ag %d\n", agno);
+               libxfs_putbuf(agfbuf);
+               libxfs_putbuf(sbbuf);
+                free(sb);
+               return;
+       }
+       agi = XFS_BUF_TO_AGI(agibuf);
+
+       /* fix up bad ag headers */
+
+       status = verify_set_agheader(mp, sbbuf, sb, agf, agi, agno);
+
+       if (status & XR_AG_SB_SEC)  {
+               if (!no_modify)
+                       sb_dirty = 1;
+               /*
+                * clear bad sector bit because we don't want
+                * to skip further processing.  we just want to
+                * ensure that we write out the modified sb buffer.
+                */
+               status &= ~XR_AG_SB_SEC;
+       }
+       if (status & XR_AG_SB)  {
+               if (!no_modify)
+                       sb_dirty = 1;
+               else
+                       do_warn("would ");
+
+               do_warn("reset bad sb for ag %d\n", agno);
+       }
+       if (status & XR_AG_AGF)  {
+               if (!no_modify)
+                       agf_dirty = 1;
+               else
+                       do_warn("would ");
+
+               do_warn("reset bad agf for ag %d\n", agno);
+       }
+       if (status & XR_AG_AGI)  {
+               if (!no_modify)
+                       agi_dirty = 1;
+               else
+                       do_warn("would ");
+
+               do_warn("reset bad agi for ag %d\n", agno);
+       }
+
+       if (status && no_modify)  {
+               libxfs_putbuf(agibuf);
+               libxfs_putbuf(agfbuf);
+               libxfs_putbuf(sbbuf);
+                free(sb);
+
+               do_warn("bad uncorrected agheader %d, skipping ag...\n", agno);
+
+               return;
+       }
+
+       scan_freelist(agf);
+
+       if (INT_GET(agf->agf_roots[XFS_BTNUM_BNO], ARCH_CONVERT) != 0 &&
+                       verify_agbno(mp, agno, INT_GET(agf->agf_roots[XFS_BTNUM_BNO], ARCH_CONVERT)))
+               scan_sbtree(INT_GET(agf->agf_roots[XFS_BTNUM_BNO], ARCH_CONVERT),
+                       INT_GET(agf->agf_levels[XFS_BTNUM_BNO], ARCH_CONVERT),
+                       agno, 0, scanfunc_bno, 1);
+       else
+               do_warn("bad agbno %u for btbno root, agno %d\n",
+                       INT_GET(agf->agf_roots[XFS_BTNUM_BNO], ARCH_CONVERT), agno);
+
+       if (INT_GET(agf->agf_roots[XFS_BTNUM_CNT], ARCH_CONVERT) != 0 &&
+                       verify_agbno(mp, agno, INT_GET(agf->agf_roots[XFS_BTNUM_CNT], ARCH_CONVERT)))
+               scan_sbtree(INT_GET(agf->agf_roots[XFS_BTNUM_CNT], ARCH_CONVERT),
+                       INT_GET(agf->agf_levels[XFS_BTNUM_CNT], ARCH_CONVERT),
+                       agno, 0, scanfunc_cnt, 1);
+       else
+               do_warn("bad agbno %u for btbcnt root, agno %d\n",
+                       INT_GET(agf->agf_roots[XFS_BTNUM_CNT], ARCH_CONVERT), agno);
+
+       if (INT_GET(agi->agi_root, ARCH_CONVERT) != 0 && verify_agbno(mp, agno, INT_GET(agi->agi_root, ARCH_CONVERT)))
+               scan_sbtree(INT_GET(agi->agi_root, ARCH_CONVERT), INT_GET(agi->agi_level, ARCH_CONVERT), agno, 0,
+                               scanfunc_ino, 1);
+       else
+               do_warn("bad agbno %u for inobt root, agno %d\n",
+                       INT_GET(agi->agi_root, ARCH_CONVERT), agno);
+
+       ASSERT(agi_dirty == 0 || agi_dirty && !no_modify);
+
+       if (agi_dirty && !no_modify)
+               libxfs_writebuf(agibuf, 0);
+       else
+               libxfs_putbuf(agibuf);
+
+       ASSERT(agf_dirty == 0 || agf_dirty && !no_modify);
+
+       if (agf_dirty && !no_modify)
+               libxfs_writebuf(agfbuf, 0);
+       else
+               libxfs_putbuf(agfbuf);
+
+       ASSERT(sb_dirty == 0 || sb_dirty && !no_modify);
+
+       if (sb_dirty && !no_modify) {
+               libxfs_xlate_sb(XFS_BUF_PTR(sbbuf), sb, -1, ARCH_CONVERT,
+                               XFS_SB_ALL_BITS);
+               libxfs_writebuf(sbbuf, 0);
+        } else
+               libxfs_putbuf(sbbuf);
+        free(sb);
+}
diff --git a/repair/scan.h b/repair/scan.h
new file mode 100644 (file)
index 0000000..42e1526
--- /dev/null
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef _XR_SCAN_H
+#define _XR_SCAN_H
+
+struct blkmap;
+
+void scan_sbtree(
+       xfs_agblock_t   root,
+       int             nlevels,
+       xfs_agnumber_t  agno,
+       int             suspect,
+       void            (*func)(xfs_btree_sblock_t      *block,
+                               int                     level,
+                               xfs_agblock_t           bno,
+                               xfs_agnumber_t          agno,
+                               int                     suspect,
+                               int                     isroot),
+       int             isroot);
+
+int scan_lbtree(
+       xfs_dfsbno_t    root,
+       int             nlevels,
+       int             (*func)(xfs_btree_lblock_t      *block,
+                               int                     level,
+                               int                     type,
+                               int                     whichfork,
+                               xfs_dfsbno_t            bno,
+                               xfs_ino_t               ino,
+                               xfs_drfsbno_t           *tot,
+                               __uint64_t              *nex,
+                               struct blkmap           **blkmapp,
+                               bmap_cursor_t           *bm_cursor,
+                               int                     isroot,
+                               int                     check_dups,
+                               int                     *dirty),
+       int             type,
+       int             whichfork,
+       xfs_ino_t       ino,
+       xfs_drfsbno_t   *tot,
+       __uint64_t      *nex,
+       struct blkmap   **blkmapp,
+       bmap_cursor_t   *bm_cursor,
+       int             isroot,
+       int             check_dups);
+
+int scanfunc_bmap(
+       xfs_btree_lblock_t      *ablock,
+       int                     level,
+       int                     type,
+       int                     whichfork,
+       xfs_dfsbno_t            bno,
+       xfs_ino_t               ino,
+       xfs_drfsbno_t           *tot,
+       __uint64_t              *nex,
+       struct blkmap           **blkmapp,
+       bmap_cursor_t           *bm_cursor,
+       int                     isroot,
+       int                     check_dups,
+       int                     *dirty);
+
+void scanfunc_bno(
+       xfs_btree_sblock_t      *ablock,
+       int                     level,
+       xfs_agblock_t           bno,
+       xfs_agnumber_t          agno,
+       int                     suspect,
+       int                     isroot);
+
+void scanfunc_cnt(
+       xfs_btree_sblock_t      *ablock,
+       int                     level,
+       xfs_agblock_t           bno,
+       xfs_agnumber_t          agno,
+       int                     suspect,
+       int                     isroot);
+
+void
+scanfunc_ino(
+       xfs_btree_sblock_t      *ablock,
+       int                     level,
+       xfs_agblock_t           bno,
+       xfs_agnumber_t          agno,
+       int                     suspect,
+       int                     isroot);
+
+#endif /* _XR_SCAN_H */
diff --git a/repair/versions.c b/repair/versions.c
new file mode 100644 (file)
index 0000000..526be22
--- /dev/null
@@ -0,0 +1,307 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+
+#define EXTERN
+#include "versions.h"
+#undef EXTERN
+#include "err_protos.h"
+#include "globals.h"
+
+void
+update_sb_version(xfs_mount_t *mp)
+{
+       xfs_sb_t        *sb;
+       __uint16_t      vn;
+
+       sb = &mp->m_sb;
+
+       if (fs_attributes)  {
+               if (!XFS_SB_VERSION_HASATTR(sb))  {
+                       ASSERT(fs_attributes_allowed);
+
+                       XFS_SB_VERSION_ADDATTR(sb);
+               }
+       }
+
+       if (fs_inode_nlink)  {
+               if (!XFS_SB_VERSION_HASNLINK(sb))  {
+                       ASSERT(fs_inode_nlink_allowed);
+
+                       XFS_SB_VERSION_ADDNLINK(sb);
+               }
+       }
+
+       /*
+        * fix up the superblock version number and feature bits,
+        * turn off quota bits and flags if the filesystem doesn't
+        * have quotas.
+        */
+       if (fs_quotas)  {
+               if (!XFS_SB_VERSION_HASQUOTA(sb))  {
+                       ASSERT(fs_quotas_allowed);
+
+                       XFS_SB_VERSION_ADDQUOTA(sb);
+               }
+
+               /*
+                * protect against stray bits in the quota flag field
+                */
+               if (sb->sb_qflags & ~(XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|
+                               XFS_UQUOTA_CHKD|XFS_PQUOTA_ACCT|
+                               XFS_PQUOTA_ENFD|XFS_PQUOTA_CHKD))  {
+                       /*
+                        * update the incore superblock, if we're in
+                        * no_modify mode, it'll never get flushed out
+                        * so this is ok.
+                        */
+                       do_warn("bogus quota flags 0x%x set in superblock",
+                               sb->sb_qflags & ~(XFS_UQUOTA_ACCT|
+                               XFS_UQUOTA_ENFD|
+                               XFS_UQUOTA_CHKD|XFS_PQUOTA_ACCT|
+                               XFS_PQUOTA_ENFD|XFS_PQUOTA_CHKD));
+
+                       sb->sb_qflags &= (XFS_UQUOTA_ACCT|
+                               XFS_UQUOTA_ENFD|
+                               XFS_UQUOTA_CHKD|XFS_PQUOTA_ACCT|
+                               XFS_PQUOTA_ENFD|XFS_PQUOTA_CHKD);
+
+                       if (!no_modify)
+                               do_warn(", bogus flags will be cleared\n");
+                       else
+                               do_warn(", bogus flags would be cleared\n");
+               }
+       } else  {
+               sb->sb_qflags = 0;
+
+               if (XFS_SB_VERSION_HASQUOTA(sb))  {
+                       lost_quotas = 1;
+                       vn = sb->sb_versionnum;
+                       vn &= ~XFS_SB_VERSION_QUOTABIT;
+
+                       if (!(vn & XFS_SB_VERSION_ALLFBITS))
+                               vn = XFS_SB_VERSION_TOOLD(vn);
+                       
+                       ASSERT(vn != 0);
+                       sb->sb_versionnum = vn;
+               }
+       }
+
+       if (!fs_aligned_inodes)  {
+               if (XFS_SB_VERSION_HASALIGN(sb))  {
+                       if (XFS_SB_VERSION_NUM(sb) == XFS_SB_VERSION_4)
+                               XFS_SB_VERSION_SUBALIGN(sb);
+               }
+       }
+
+       return;
+}
+
+/*
+ * returns 0 if things are fine, 1 if we don't understand
+ * this superblock version.  Sets superblock geometry-dependent
+ * global variables.
+ */
+int
+parse_sb_version(xfs_sb_t *sb)
+{
+       int issue_warning;
+
+       fs_attributes = 0;
+       fs_inode_nlink = 0;
+       fs_quotas = 0;
+       fs_aligned_inodes = 0;
+       fs_sb_feature_bits = 0;
+       fs_ino_alignment = 0;
+       fs_has_extflgbit = 0;
+       have_uquotino = 0;
+       have_pquotino = 0;
+       issue_warning = 0;
+
+       /*
+        * ok, check to make sure that the sb isn't newer
+        * than we are
+        */
+       if (XFS_SB_VERSION_HASEXTFLGBIT(sb))  {
+               fs_has_extflgbit = 1;
+               if (!fs_has_extflgbit_allowed)  {
+                       issue_warning = 1;
+                       do_warn(
+                          "This filesystem has uninitialized extent flags.\n");
+               }
+       }
+
+       if (XFS_SB_VERSION_HASSHARED(sb))  {
+               fs_shared = 1;
+               if (!fs_shared_allowed)  {
+                       issue_warning = 1;
+                       do_warn("This filesystem is marked shared.\n");
+               }
+       }
+
+       if (issue_warning)  {
+               do_warn(
+"This filesystem uses 6.5 feature(s) not yet supported in this release.\n\
+Please run a 6.5 version of xfs_repair.\n");
+               return(1);
+       }
+
+       if (!XFS_SB_GOOD_VERSION(sb))  {
+               do_warn(
+       "WARNING:  unknown superblock version %d\n", XFS_SB_VERSION_NUM(sb));
+               do_warn(
+       "This filesystem contains features not understood by this program.\n");
+               return(1);
+       }
+
+       if (XFS_SB_VERSION_NUM(sb) == XFS_SB_VERSION_4)  {
+               if (!fs_sb_feature_bits_allowed)  {
+                       do_warn(
+       "WARNING:  you have disallowed superblock feature bits disallowed\n");
+                       do_warn(
+       "\tbut this superblock has feature bits.  The superblock\n");
+
+                       if (!no_modify)  {
+                               do_warn(
+       "\twill be downgraded.  This may cause loss of filesystem meta-data\n");
+                       } else   {
+                               do_warn(
+       "\twould be downgraded.  This might cause loss of filesystem\n");
+                               do_warn(
+       "\tmeta-data.\n");
+                       }
+               } else   {
+                       fs_sb_feature_bits = 1;
+               }
+       }
+
+       if (XFS_SB_VERSION_HASATTR(sb))  {
+               if (!fs_attributes_allowed)  {
+                       do_warn(
+       "WARNING:  you have disallowed attributes but this filesystem\n");
+                       if (!no_modify)  {
+                               do_warn(
+       "\thas attributes.  The filesystem will be downgraded and\n");
+                               do_warn(
+       "\tall attributes will be removed.\n");
+                       } else  {
+                               do_warn(
+       "\thas attributes.  The filesystem would be downgraded and\n");
+                               do_warn(
+       "\tall attributes would be removed.\n");
+                       }
+               } else   {
+                       fs_attributes = 1;
+               }
+       }
+
+       if (XFS_SB_VERSION_HASNLINK(sb))  {
+               if (!fs_inode_nlink_allowed)  {
+                       do_warn(
+       "WARNING:  you have disallowed version 2 inodes but this filesystem\n");
+                       if (!no_modify)  {
+                               do_warn(
+       "\thas version 2 inodes.  The filesystem will be downgraded and\n");
+                               do_warn(
+       "\tall version 2 inodes will be converted to version 1 inodes.\n");
+                               do_warn(
+       "\tThis may cause some hard links to files to be destroyed\n");
+                       } else  {
+                               do_warn(
+       "\thas version 2 inodes.  The filesystem would be downgraded and\n");
+                               do_warn(
+       "\tall version 2 inodes would be converted to version 1 inodes.\n");
+                               do_warn(
+       "\tThis might cause some hard links to files to be destroyed\n");
+                       }
+               } else   {
+                       fs_inode_nlink = 1;
+               }
+       }
+
+       if (XFS_SB_VERSION_HASQUOTA(sb))  {
+               if (!fs_quotas_allowed)  {
+                       do_warn(
+       "WARNING:  you have disallowed quotas but this filesystem\n");
+                       if (!no_modify)  {
+                               do_warn(
+       "\thas quotas.  The filesystem will be downgraded and\n");
+                               do_warn(
+       "\tall quota information will be removed.\n");
+                       } else  {
+                               do_warn(
+       "\thas quotas.  The filesystem would be downgraded and\n");
+                               do_warn(
+       "\tall quota information would be removed.\n");
+                       }
+               } else   {
+                       fs_quotas = 1;
+
+                       if (sb->sb_uquotino != 0 &&
+                                       sb->sb_uquotino != NULLFSINO)
+                               have_uquotino = 1;
+
+                       if (sb->sb_pquotino != 0 &&
+                                       sb->sb_pquotino != NULLFSINO)
+                               have_pquotino = 1;
+               }
+       }
+
+       if (XFS_SB_VERSION_HASALIGN(sb))  {
+               if (fs_aligned_inodes_allowed)  {
+                       fs_aligned_inodes = 1;
+                       fs_ino_alignment = sb->sb_inoalignmt;
+               } else   {
+                       do_warn(
+       "WARNING:  you have disallowed aligned inodes but this filesystem\n");
+                       if (!no_modify)  {
+                               do_warn(
+       "\thas aligned inodes.  The filesystem will be downgraded.\n");
+                               do_warn(
+"\tThis will permanently degrade the performance of this filesystem.\n");
+                       } else  {
+                               do_warn(
+       "\thas aligned inodes.  The filesystem would be downgraded.\n");
+                               do_warn(
+"\tThis would permanently degrade the performance of this filesystem.\n");
+                       }
+               }
+       }
+
+       /*
+        * calculate maximum file offset for this geometry
+        */
+       fs_max_file_offset = 0x7fffffffffffffffLL >> sb->sb_blocklog;
+
+       return(0);
+}
diff --git a/repair/versions.h b/repair/versions.h
new file mode 100644 (file)
index 0000000..5f592be
--- /dev/null
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#ifndef _XR_VERSIONS_H
+#define _XR_VERSIONS_H
+
+#ifndef EXTERN
+#define EXTERN extern
+#endif /* EXTERN */
+
+/*
+ * possible XFS filesystem features
+ *
+ * attributes                                  (6.2)
+ * inode version 2 (32-bit link counts)                (6.2)
+ * quotas                                      (6.2+)
+ * aligned inodes                              (6.2+)
+ *
+ * bitmask fields happend after 6.2.
+ */
+
+/*
+ * filesystem feature global vars, set to 1 if the feature
+ * is *allowed*, 0 otherwise.  These can be set via command-line
+ * options
+ */
+
+EXTERN int             fs_attributes_allowed;
+EXTERN int             fs_inode_nlink_allowed;
+EXTERN int             fs_quotas_allowed;
+EXTERN int             fs_aligned_inodes_allowed;
+EXTERN int             fs_sb_feature_bits_allowed;
+EXTERN int             fs_has_extflgbit_allowed;
+EXTERN int             fs_shared_allowed;
+
+/*
+ * filesystem feature global vars, set to 1 if the feature
+ * is on, 0 otherwise
+ */
+
+EXTERN int             fs_attributes;
+EXTERN int             fs_inode_nlink;
+EXTERN int             fs_quotas;
+EXTERN int             fs_aligned_inodes;
+EXTERN int             fs_sb_feature_bits;
+EXTERN int             fs_has_extflgbit;
+EXTERN int             fs_shared;
+
+/*
+ * inode chunk alignment, fsblocks
+ */
+
+EXTERN xfs_extlen_t    fs_ino_alignment;
+
+/*
+ * modify superblock to reflect current state of global fs
+ * feature vars above
+ */
+void                   update_sb_version(xfs_mount_t *mp);
+
+/*
+ * parse current sb to set above feature vars
+ */
+int                    parse_sb_version(xfs_sb_t *sb);
+
+#endif /* _XR_VERSIONS_H */
diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
new file mode 100644 (file)
index 0000000..9f32031
--- /dev/null
@@ -0,0 +1,582 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * 
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ * 
+ * For further information regarding this notice, see: 
+ * 
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <libxfs.h>
+#include "avl.h"
+#include "avl64.h"
+#include "globals.h"
+#include "versions.h"
+#include "agheader.h"
+#include "protos.h"
+#include "incore.h"
+#include "err_protos.h"
+
+#define        rounddown(x, y) (((x)/(y))*(y))
+
+extern void    phase1(xfs_mount_t *);
+extern void    phase2(xfs_mount_t *, libxfs_init_t *);
+extern void    phase3(xfs_mount_t *);
+extern void    phase4(xfs_mount_t *);
+extern void    phase5(xfs_mount_t *);
+extern void    phase6(xfs_mount_t *);
+extern void    phase7(xfs_mount_t *);
+extern void    incore_init(xfs_mount_t *);
+
+#define                XR_MAX_SECT_SIZE        (64 * 1024)
+
+/*
+ * option tables for getsubopt calls
+ */
+
+/*
+ * -o (user-supplied override options)
+ */
+
+char *o_opts[] = {
+#define ASSUME_XFS     0
+       "assume_xfs",
+#define PRE_65_BETA    1
+       "fs_is_pre_65_beta",
+       NULL
+};
+
+static void
+usage(void)
+{
+       do_warn("Usage: %s [-nV] [-o subopt[=value]] [-l logdevice] devname\n",
+               progname);
+       exit(1);
+}
+
+static char *err_message[] = {
+       "no error",
+       "bad magic number",
+       "bad blocksize field",
+       "bad blocksize log field",
+       "bad version number",
+       "filesystem mkfs-in-progress bit set",
+       "inconsistent filesystem geometry information",
+       "bad inode size or inconsistent with number of inodes/block",
+       "bad sector size",
+       "AGF geometry info conflicts with filesystem geometry",
+       "AGI geometry info conflicts with filesystem geometry",
+       "AG superblock geometry info conflicts with filesystem geometry",
+       "attempted to perform I/O beyond EOF",
+       "inconsistent filesystem geometry in realtime filesystem component",
+       "maximum indicated percentage of inodes > 100%",
+       "inconsistent inode alignment value",
+       "not enough secondary superblocks with matching geometry",
+       "bad stripe unit in superblock",
+       "bad stripe width in superblock",
+       "bad shared version number in superblock"
+};
+
+char *
+err_string(int err_code)
+{
+       if (err_code < XR_OK || err_code >= XR_BAD_ERR_CODE)
+               do_abort("bad error code - %d\n", err_code);
+
+       return(err_message[err_code]);
+}
+
+static void
+noval(char opt, char *tbl[], int idx)
+{
+       do_warn("-%c %s option cannot have a value\n", opt, tbl[idx]);
+       usage();
+}
+
+static void
+respec(char opt, char *tbl[], int idx)
+{
+       do_warn("-%c ", opt);
+       if (tbl)
+               do_warn("%s ", tbl[idx]);
+       do_warn("option respecified\n");
+       usage();
+}
+
+static void
+unknown(char opt, char *s)
+{
+       do_warn("unknown option -%c %s\n", opt, s);
+       usage();
+}
+
+/*
+ * sets only the global argument flags and variables
+ */
+void
+process_args(int argc, char **argv)
+{
+       char *p;
+       int c;
+
+       log_spec = 0;
+       fs_is_dirty = 0;
+       verbose = 0;
+       no_modify = 0;
+       isa_file = 0;
+       dumpcore = 0;
+       full_backptrs = 0;
+       delete_attr_ok = 1;
+       force_geo = 0;
+       assume_xfs = 0;
+       clear_sunit = 0;
+       sb_inoalignmt = 0;
+       sb_unit = 0;
+       sb_width = 0;
+       fs_attributes_allowed = 1;
+       fs_inode_nlink_allowed = 1;
+       fs_quotas_allowed = 1;
+       fs_aligned_inodes_allowed = 1;
+       fs_sb_feature_bits_allowed = 1;
+       fs_has_extflgbit_allowed = 1;
+       pre_65_beta = 0;
+       fs_shared_allowed = 1;
+
+       /*
+        * XXX have to add suboption processing here
+        * attributes, quotas, nlinks, aligned_inos, sb_fbits
+        */
+       while ((c = getopt(argc, argv, "o:fnDvVl:")) != EOF)  {
+               switch (c) {
+               case 'D':
+                       dumpcore = 1;
+                       break;
+               case 'o':
+                       p = optarg;
+                       while (*p != '\0')  {
+                               char *val;
+
+                               switch (getsubopt(&p, (constpp)o_opts, &val))  {
+                               case ASSUME_XFS:
+                                       if (val)
+                                               noval('o', o_opts, ASSUME_XFS);
+                                       if (assume_xfs)
+                                               respec('o', o_opts, ASSUME_XFS);
+                                       assume_xfs = 1;
+                                       break;
+                               case PRE_65_BETA:
+                                       if (val)
+                                               noval('o', o_opts, PRE_65_BETA);
+                                       if (pre_65_beta)
+                                               respec('o', o_opts,
+                                                       PRE_65_BETA);
+                                       pre_65_beta = 1;
+                                       break;
+                               default:
+                                       unknown('o', val);
+                                       break;
+                               }
+                       }
+                       break;
+               case 'l':
+                       log_name = optarg;
+                       log_spec = 1;
+                       break;
+               case 'f':
+                       isa_file = 1;
+                       break;
+               case 'n':
+                       no_modify = 1;
+                       break;
+               case 'v':
+                       verbose = 1;
+                       break;
+               case 'V':
+                       printf("%s version %s\n", progname, VERSION);
+                       break;
+               case '?':
+                       usage();
+               }
+       }
+
+       if (argc - optind != 1)
+               usage();
+
+       if ((fs_name = argv[optind]) == NULL)
+               usage();
+}
+
+void
+do_msg(int do_abort, char const *msg, va_list args)
+{
+       vfprintf(stderr, msg, args);
+
+       if (do_abort)  {
+               if (dumpcore)
+                       abort();
+               exit(1);
+       }
+}
+
+void
+do_error(char const *msg, ...)
+{
+       va_list args;
+
+       fprintf(stderr, "\nfatal error -- ");
+
+       va_start(args, msg);
+       do_msg(1, msg, args);
+}
+
+/*
+ * like do_error, only the error is internal, no system
+ * error so no oserror processing
+ */
+void
+do_abort(char const *msg, ...)
+{
+       va_list args;
+
+       va_start(args, msg);
+       do_msg(1, msg, args);
+}
+
+void
+do_warn(char const *msg, ...)
+{
+       va_list args;
+
+       fs_is_dirty = 1;
+
+       va_start(args, msg);
+       do_msg(0, msg, args);
+       va_end(args);
+}
+
+/* no formatting */
+
+void
+do_log(char const *msg, ...)
+{
+       va_list args;
+
+       va_start(args, msg);
+       do_msg(0, msg, args);
+       va_end(args);
+}
+
+void
+calc_mkfs(xfs_mount_t *mp)
+{
+       xfs_agblock_t   fino_bno;
+       int             do_inoalign;
+
+       do_inoalign = mp->m_sinoalign;
+
+       /*
+        * pre-calculate geometry of ag 0.  We know what it looks
+        * like because we know what mkfs does -- 3 btree roots,
+        * and some number of blocks to prefill the agfl.
+        */
+       bnobt_root = howmany(4 * mp->m_sb.sb_sectsize, mp->m_sb.sb_blocksize);
+       bcntbt_root = bnobt_root + 1;
+       inobt_root = bnobt_root + 2;
+       fino_bno = inobt_root + XFS_MIN_FREELIST_RAW(1, 1, mp) + 1;
+
+       /*
+        * ditto the location of the first inode chunks in the fs ('/')
+        */
+       if (XFS_SB_VERSION_HASDALIGN(&mp->m_sb) && do_inoalign)  {
+               first_prealloc_ino = XFS_OFFBNO_TO_AGINO(mp, roundup(fino_bno,
+                                       mp->m_sb.sb_unit), 0);
+       } else if (XFS_SB_VERSION_HASALIGN(&mp->m_sb) && 
+                                       mp->m_sb.sb_inoalignmt > 1)  {
+               first_prealloc_ino = XFS_OFFBNO_TO_AGINO(mp,
+                                       roundup(fino_bno,
+                                               mp->m_sb.sb_inoalignmt),
+                                       0);
+       } else  {
+               first_prealloc_ino = XFS_OFFBNO_TO_AGINO(mp, fino_bno, 0);
+       }
+
+       ASSERT(XFS_IALLOC_BLOCKS(mp) > 0);
+
+       if (XFS_IALLOC_BLOCKS(mp) > 1)
+               last_prealloc_ino = first_prealloc_ino + XFS_INODES_PER_CHUNK;
+       else
+               last_prealloc_ino = XFS_OFFBNO_TO_AGINO(mp, fino_bno + 1, 0);
+
+       /*
+        * now the first 3 inodes in the system
+        */
+       if (mp->m_sb.sb_rootino != first_prealloc_ino)  {
+               do_warn(
+       "sb root inode value %llu inconsistent with calculated value %llu\n",
+               mp->m_sb.sb_rootino, first_prealloc_ino);
+
+               if (!no_modify)
+                       do_warn(
+                       "resetting superblock root inode pointer to %llu\n",
+                               first_prealloc_ino);
+               else
+                       do_warn(
+                       "would reset superblock root inode pointer to %llu\n",
+                               first_prealloc_ino);
+
+               /*
+                * just set the value -- safe since the superblock
+                * doesn't get flushed out if no_modify is set
+                */
+               mp->m_sb.sb_rootino = first_prealloc_ino;
+       }
+
+       if (mp->m_sb.sb_rbmino != first_prealloc_ino + 1)  {
+               do_warn(
+"sb realtime bitmap inode %llu inconsistent with calculated value %llu\n",
+               mp->m_sb.sb_rbmino, first_prealloc_ino + 1);
+
+               if (!no_modify)
+                       do_warn(
+               "resetting superblock realtime bitmap ino pointer to %llu\n",
+                               first_prealloc_ino + 1);
+               else
+                       do_warn(
+               "would reset superblock realtime bitmap ino pointer to %llu\n",
+                               first_prealloc_ino + 1);
+
+               /*
+                * just set the value -- safe since the superblock
+                * doesn't get flushed out if no_modify is set
+                */
+               mp->m_sb.sb_rbmino = first_prealloc_ino + 1;
+       }
+
+       if (mp->m_sb.sb_rsumino != first_prealloc_ino + 2)  {
+               do_warn(
+"sb realtime summary inode %llu inconsistent with calculated value %llu\n",
+               mp->m_sb.sb_rsumino, first_prealloc_ino + 2);
+
+               if (!no_modify)
+                       do_warn(
+               "resetting superblock realtime summary ino pointer to %llu\n",
+                               first_prealloc_ino + 2);
+               else
+                       do_warn(
+               "would reset superblock realtime summary ino pointer to %llu\n",
+                               first_prealloc_ino + 2);
+
+               /*
+                * just set the value -- safe since the superblock
+                * doesn't get flushed out if no_modify is set
+                */
+               mp->m_sb.sb_rsumino = first_prealloc_ino + 2;
+       }
+
+}
+
+int
+main(int argc, char **argv)
+{
+       libxfs_init_t   args;
+       xfs_mount_t     *temp_mp;
+       xfs_mount_t     *mp;
+       xfs_sb_t        *sb;
+       xfs_buf_t       *sbp;
+       xfs_mount_t     xfs_m;
+
+       progname = basename(argv[0]);
+
+       temp_mp = &xfs_m;
+       setbuf(stdout, NULL);
+
+       process_args(argc, argv);
+       xfs_init(&args);
+
+       /* do phase1 to make sure we have a superblock */
+       phase1(temp_mp);
+
+       if (no_modify && primary_sb_modified)  {
+               do_warn("primary superblock would have been modified.\n");
+               do_warn("cannot proceed further in no_modify mode.\n");
+               do_warn("exiting now.\n");
+               exit(1);
+       }
+
+       /* prepare the mount structure */
+       sbp = libxfs_readbuf(args.ddev, XFS_SB_DADDR, 1, 0);
+       memset(&xfs_m, 0, sizeof(xfs_mount_t));
+       sb = &xfs_m.m_sb;
+       libxfs_xlate_sb(XFS_BUF_PTR(sbp), sb, 1, ARCH_CONVERT, XFS_SB_ALL_BITS);
+
+       mp = libxfs_mount(&xfs_m, sb, args.ddev, args.logdev, args.rtdev, 0);
+
+       if (!mp)  {
+               fprintf(stderr, "%s: cannot repair this filesystem.  Sorry.\n",
+                       progname);
+               exit(1);
+       }
+       libxfs_putbuf(sbp);
+
+       /*
+        * set XFS-independent status vars from the mount/sb structure
+        */
+       glob_agcount = mp->m_sb.sb_agcount;
+
+       chunks_pblock = mp->m_sb.sb_inopblock / XFS_INODES_PER_CHUNK;
+       max_symlink_blocks = howmany(MAXPATHLEN - 1, mp->m_sb.sb_blocksize);
+       inodes_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog;
+
+       /*
+        * calculate what mkfs would do to this filesystem
+        */
+       calc_mkfs(mp);
+
+       /*
+        * check sb filesystem stats and initialize in-core data structures
+        */
+       incore_init(mp);
+
+       if (parse_sb_version(&mp->m_sb))  {
+               do_warn(
+                     "Found unsupported filesystem features.  Exiting now.\n");
+               return(1);
+       }
+
+       /* make sure the per-ag freespace maps are ok so we can mount the fs */
+
+       phase2(mp, &args);
+
+       phase3(mp);
+
+       phase4(mp);
+
+       if (no_modify)
+               printf("No modify flag set, skipping phase 5\n");
+       else
+               phase5(mp);
+
+       if (!bad_ino_btree)  {
+               phase6(mp);
+
+               phase7(mp);
+       } else  {
+               do_warn(
+       "Inode allocation btrees are too corrupted, skipping phases 6 and 7\n");
+       }
+
+       if (lost_quotas && !have_uquotino && !have_pquotino)  {
+               if (!no_modify)  {
+                       do_warn(
+       "Warning:  no quota inodes were found.  Quotas disabled.\n");
+               } else  {
+                       do_warn(
+       "Warning:  no quota inodes were found.  Quotas would be disabled.\n");
+               }
+       } else if (lost_quotas)  {
+               if (!no_modify)  {
+                       do_warn(
+       "Warning:  quota inodes were cleared.  Quotas disabled.\n");
+               } else  {
+                       do_warn(
+"Warning:  quota inodes would be cleared.  Quotas would be disabled.\n");
+               }
+       } else  {
+               if (lost_uquotino)  {
+                       if (!no_modify)  {
+                               do_warn(
+               "Warning:  user quota information was cleared.\n");
+                               do_warn(
+"User quotas can not be enforced until limit information is recreated.\n");
+                       } else  {
+                               do_warn(
+               "Warning:  user quota information would be cleared.\n");
+                               do_warn(
+"User quotas could not be enforced until limit information was recreated.\n");
+                       }
+               }
+
+               if (lost_pquotino)  {
+                       if (!no_modify)  {
+                               do_warn(
+               "Warning:  project quota information was cleared.\n");
+                               do_warn(
+"Project quotas can not be enforced until limit information is recreated.\n");
+                       } else  {
+                               do_warn(
+               "Warning:  project quota information would be cleared.\n");
+                               do_warn(
+"Project quotas could not be enforced until limit information was recreated.\n");
+                       }
+               }
+       }
+
+       if (no_modify)  {
+               do_log(
+       "No modify flag set, skipping filesystem flush and exiting.\n");
+               if (fs_is_dirty)
+                       return(1);
+
+               return(0);
+       }
+
+       /*
+        * Clear the quota flags if they're on.
+        */
+       sbp = libxfs_getsb(mp, 0);
+       if (!sbp)
+               do_error("couldn't get superblock\n");
+
+       sb = XFS_BUF_TO_SBP(sbp);
+
+       if (sb->sb_qflags & (XFS_UQUOTA_CHKD|XFS_PQUOTA_CHKD))  {
+               do_warn(
+               "Note - quota info will be regenerated on next quota mount.\n");
+               sb->sb_qflags &= ~(XFS_UQUOTA_CHKD|XFS_PQUOTA_CHKD);
+       }
+
+       if (clear_sunit) {
+               do_warn(
+"Note - stripe unit (%d) and width (%d) fields have been reset.\n"
+"Please set with mount -o sunit=<value>,swidth=<value>\n", 
+                       sb->sb_unit, sb->sb_width);
+               sb->sb_unit = 0;
+               sb->sb_width = 0;
+       } 
+
+       libxfs_writebuf(sbp, 0);
+
+       libxfs_umount(mp);
+       if (args.rtdev)
+               libxfs_device_close(args.rtdev);
+       if (args.logdev)
+               libxfs_device_close(args.logdev);
+       libxfs_device_close(args.ddev);
+
+       do_log("done\n");
+
+       return(0);
+}