]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/commitdiff
Revert "3.0.5 release" and some of its preceding commits.
authorAlex Elder <aelder@sgi.com>
Wed, 25 Nov 2009 23:44:40 +0000 (17:44 -0600)
committerAlex Elder <aelder@sgi.com>
Wed, 25 Nov 2009 23:44:40 +0000 (17:44 -0600)
This reverts 11 commits that followed merge 15a60a5...:
    b0567f1 3.0.5 release
    24d9757 add lpath_to_handle to libhandle
    bad0fe5 repair: add missing locking in scanfunc_bmap
    2098754 repair: optimize duplicate extent tracking
    241ea1c repair: switch block usage bitmap to a btree
    af20fe6 repair: cleanup alloc/free/reset of the block...
    add8f66 repair: cleanup helpers for tracking block usage
    da9398d repair: track logical to physical block mapping...
    d081a36 repair: clean up prefetch tracing
    d93f8b2 repair: use single prefetch queue
    eb26465 repair: use a btree instead of a radix tree for...

Signed-off-by: Alex Elder <aelder@sgi.com>
29 files changed:
VERSION
doc/CHANGES
include/handle.h
libhandle/handle.c
repair/Makefile
repair/bmap.c
repair/bmap.h
repair/btree.c [deleted file]
repair/btree.h [deleted file]
repair/dino_chunks.c
repair/dinode.c
repair/dir2.c
repair/globals.h
repair/incore.c
repair/incore.h
repair/incore_ext.c
repair/init.c
repair/phase2.c
repair/phase3.c
repair/phase4.c
repair/phase5.c
repair/phase6.c
repair/prefetch.c
repair/prefetch.h
repair/radix-tree.c [new file with mode: 0644]
repair/radix-tree.h [new file with mode: 0644]
repair/rt.c
repair/scan.c
repair/xfs_repair.c

diff --git a/VERSION b/VERSION
index 9dff34aa3bc11f912fc99febaa3717f71dbfcc4f..60188182694d62dedd6004de96b7df7444e211cb 100644 (file)
--- a/VERSION
+++ b/VERSION
@@ -3,5 +3,5 @@
 #
 PKG_MAJOR=3
 PKG_MINOR=0
-PKG_REVISION=5
+PKG_REVISION=4
 PKG_BUILD=1
index 2e4d2ac6f45aa7523f850ac1ebe72943410625d5..515c04c009514d41808aaed90207b13c9a90baa4 100644 (file)
@@ -1,19 +1,3 @@
-xfsprogs-3.0.5 (23 October 2009)
-       - Use btrees in xfs_repair in a number of critical data
-         structures, in place of bitmaps and radix trees, resulting
-         in reduced memory and CPU requirements for large file
-         systems.
-       - Various other performance improvements in xfs_repair.
-       - Add a new function lpath_to_handle() to libhandle, which
-         allows symlinks to be handled more robustly.
-       - Tweak the code so a single scanfunc_allocbt() can be used
-         in place of the two nearly-identical functions used before.
-       - Add support for discarding blocks to mkfs (along with a
-         command-line option to avoid its use if desired).
-       - Allow use of libblkid from util-linux if it is available,
-         for determining device geometry.
-       - A few configuration and build improvements.
-
 xfsprogs-3.0.4 (17 September 2009)
        - Fix a memory leak in xfsprogs.
        - Increase hash chain length in xfsprogs when running out of memory.
index 3f1a137f785dc3be1ae8fd0ed783f8af96c5dec2..b211a2f4526ec173e18f27cd6acd5b143ac4b9d1 100644 (file)
@@ -27,8 +27,6 @@ struct attrlist_cursor;
 struct parent;
 
 extern int  path_to_handle (char *__path, void **__hanp, size_t *__hlen);
-extern int  lpath_to_handle (char *__fspath, char *__path,
-                            void **__hanp, size_t *__hlen);
 extern int  path_to_fshandle (char *__path, void **__fshanp, size_t *__fshlen);
 extern int  handle_to_fshandle (void *__hanp, size_t __hlen, void **__fshanp,
                                size_t *__fshlen);
index 6c9380de3b29049fde4942b46bf5dc60f8bac6b8..6276797485ec12360f53af2ea772b4fb8b9e7ead 100644 (file)
@@ -110,30 +110,17 @@ path_to_handle(
        char            *path,          /* input,  path to convert */
        void            **hanp,         /* output, pointer to data */
        size_t          *hlen)          /* output, size of returned data */
-{
-       return lpath_to_handle(path, path, hanp, hlen);
-}
-
-/* Like path_to_handle, but reliable for paths which are either dangling
- * symlinks or symlinks whose targets are not in XFS filesystems.
- */
-int
-lpath_to_handle(
-       char            *fspath,        /* input,  path in filesystem */
-       char            *path,          /* input,  path to convert */
-       void            **hanp,         /* output, pointer to data */
-       size_t          *hlen)          /* output, size of returned data */
 {
        int             fd;
        int             result;
        comarg_t        obj;
 
-       fd = open(fspath, O_RDONLY);
+       fd = open(path, O_RDONLY);
        if (fd < 0)
                return -1;
 
        obj.path = path;
-       result = obj_to_handle(fspath, fd, XFS_IOC_PATH_TO_HANDLE,
+       result = obj_to_handle(path, fd, XFS_IOC_PATH_TO_HANDLE,
                                obj, hanp, hlen);
        close(fd);
        return result;
index fa96df52a372533b70a2319cc6acfeeffc6873ae..a80ea41fd192845bdc03b1e7044e5c0ec323db22 100644 (file)
@@ -9,15 +9,15 @@ LSRCFILES = README
 
 LTCOMMAND = xfs_repair
 
-HFILES = agheader.h attr_repair.h avl.h avl64.h bmap.h btree.h \
-       dinode.h dir.h dir2.h err_protos.h globals.h incore.h protos.h rt.h \
-       progress.h scan.h versions.h prefetch.h threads.h
+HFILES = agheader.h attr_repair.h avl.h avl64.h bmap.h dinode.h dir.h \
+       dir2.h err_protos.h globals.h incore.h protos.h rt.h \
+       progress.h scan.h versions.h prefetch.h radix-tree.h threads.h
 
-CFILES = agheader.c attr_repair.c avl.c avl64.c bmap.c btree.c \
-       dino_chunks.c dinode.c dir.c dir2.c globals.c incore.c \
+CFILES = agheader.c attr_repair.c avl.c avl64.c bmap.c dino_chunks.c \
+       dinode.c dir.c dir2.c globals.c incore.c \
        incore_bmc.c init.c incore_ext.c incore_ino.c phase1.c \
        phase2.c phase3.c phase4.c phase5.c phase6.c phase7.c \
-       progress.c prefetch.c rt.c sb.c scan.c threads.c \
+       progress.c prefetch.c radix-tree.c rt.c sb.c scan.c threads.c \
        versions.c xfs_repair.c
 
 LLDLIBS = $(LIBXFS) $(LIBXLOG) $(LIBUUID) $(LIBRT) $(LIBPTHREAD)
@@ -32,7 +32,9 @@ include $(BUILDRULES)
 
 #
 # Tracing flags:
+# -DXR_BMAP_DBG                incore block bitmap debugging
 # -DXR_INODE_TRACE     inode processing
+# -DXR_BMAP_TRACE      bmap btree processing
 # -DXR_DIR_TRACE       directory processing
 # -DXR_DUP_TRACE       duplicate extent processing
 # -DXR_BCNT_TRACE      incore bcnt freespace btree building
index 79b9f79f4a8772c4be29d543e1d35453a7a0cb03..05d5da89b043fe4c8ae4f509b53340a41ba13c98 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2001,2005,2008 Silicon Graphics, Inc.
+ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
  * All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or
 #include "bmap.h"
 
 /*
- * Track the logical to physical block mapping for inodes.
- *
- * Repair only processes one inode at a given time per thread, and the
- * block map does not have to outlive the processing of a single inode.
- *
- * The combination of those factors means we can use pthreads thread-local
- * storage to store the block map, and we can re-use the allocation over
- * and over again.
+ * Block mapping code taken from xfs_db.
+ */
+
+/*
+ * Append an extent to the block entry.
  */
+void
+blkent_append(
+       blkent_t        **entp,
+       xfs_dfsbno_t    b,
+       xfs_dfilblks_t  c)
+{
+       blkent_t        *ent;
+       size_t          size;
+       int             i;
 
-pthread_key_t  dblkmap_key;
-pthread_key_t  ablkmap_key;
+       ent = *entp;
+       size = BLKENT_SIZE(c + ent->nblks);
+       if ((*entp = ent = realloc(ent, size)) == NULL) {
+               do_warn(_("realloc failed in blkent_append (%u bytes)\n"),
+                       size);
+               return;
+       }
+       for (i = 0; i < c; i++)
+               ent->blks[ent->nblks + i] = b + i;
+       ent->nblks += c;
+}
+
+/*
+ * Make a new block entry.
+ */
+blkent_t *
+blkent_new(
+       xfs_dfiloff_t   o,
+       xfs_dfsbno_t    b,
+       xfs_dfilblks_t  c)
+{
+       blkent_t        *ent;
+       int             i;
+
+       if ((ent = malloc(BLKENT_SIZE(c))) == NULL) {
+               do_warn(_("malloc failed in blkent_new (%u bytes)\n"),
+                       BLKENT_SIZE(c));
+               return ent;
+       }
+       ent->nblks = c;
+       ent->startoff = o;
+       for (i = 0; i < c; i++)
+               ent->blks[i] = b + i;
+       return ent;
+}
 
+/*
+ * Prepend an extent to the block entry.
+ */
+void
+blkent_prepend(
+       blkent_t        **entp,
+       xfs_dfsbno_t    b,
+       xfs_dfilblks_t  c)
+{
+       int             i;
+       blkent_t        *newent;
+       blkent_t        *oldent;
+
+       oldent = *entp;
+       if ((newent = malloc(BLKENT_SIZE(oldent->nblks + c))) == NULL) {
+               do_warn(_("malloc failed in blkent_prepend (%u bytes)\n"),
+                       BLKENT_SIZE(oldent->nblks + c));
+               *entp = newent;
+               return;
+       }
+       newent->nblks = oldent->nblks + c;
+       newent->startoff = oldent->startoff - c;
+       for (i = 0; i < c; i++)
+               newent->blks[i] = b + c;
+       for (; i < oldent->nblks + c; i++)
+               newent->blks[i] = oldent->blks[i - c];
+       free(oldent);
+       *entp = newent;
+}
+
+/*
+ * Allocate a block map.
+ */
 blkmap_t *
 blkmap_alloc(
-       xfs_extnum_t    nex,
-       int             whichfork)
+       xfs_extnum_t    nex)
 {
-       pthread_key_t   key;
        blkmap_t        *blkmap;
 
-       ASSERT(whichfork == XFS_DATA_FORK || whichfork == XFS_ATTR_FORK);
-
        if (nex < 1)
                nex = 1;
-
-       key = whichfork ? ablkmap_key : dblkmap_key;
-       blkmap = pthread_getspecific(key);
-       if (!blkmap || blkmap->naexts < nex) {
-               blkmap = realloc(blkmap, BLKMAP_SIZE(nex));
-               if (!blkmap) {
-                       do_warn(_("malloc failed in blkmap_alloc (%u bytes)\n"),
-                               BLKMAP_SIZE(nex));
-                       return NULL;
-               }
-               pthread_setspecific(key, blkmap);
-               blkmap->naexts = nex;
+       if ((blkmap = malloc(BLKMAP_SIZE(nex))) == NULL) {
+               do_warn(_("malloc failed in blkmap_alloc (%u bytes)\n"),
+                       BLKMAP_SIZE(nex));
+               return blkmap;
        }
-
-       blkmap->nexts = 0;
+       blkmap->naents = nex;
+       blkmap->nents = 0;
        return blkmap;
 }
 
@@ -71,7 +131,14 @@ void
 blkmap_free(
        blkmap_t        *blkmap)
 {
-       /* nothing to do! - keep the memory around for the next inode */
+       blkent_t        **entp;
+       xfs_extnum_t    i;
+
+       if (blkmap == NULL)
+               return;
+       for (i = 0, entp = blkmap->ents; i < blkmap->nents; i++, entp++)
+               free(*entp);
+       free(blkmap);
 }
 
 /*
@@ -82,18 +149,20 @@ blkmap_get(
        blkmap_t        *blkmap,
        xfs_dfiloff_t   o)
 {
-       bmap_ext_t      *ext = blkmap->exts;
+       blkent_t        *ent;
+       blkent_t        **entp;
        int             i;
 
-       for (i = 0; i < blkmap->nexts; i++, ext++) {
-               if (o >= ext->startoff && o < ext->startoff + ext->blockcount)
-                       return ext->startblock + (o - ext->startoff);
+       for (i = 0, entp = blkmap->ents; i < blkmap->nents; i++, entp++) {
+               ent = *entp;
+               if (o >= ent->startoff && o < ent->startoff + ent->nblks)
+                       return ent->blks[o - ent->startoff];
        }
        return NULLDFSBNO;
 }
 
 /*
- * Get a chunk of entries from a block map - only used for reading dirv2 blocks
+ * Get a chunk of entries from a block map.
  */
 int
 blkmap_getn(
@@ -103,62 +172,93 @@ blkmap_getn(
        bmap_ext_t      **bmpp,
        bmap_ext_t      *bmpp_single)
 {
-       bmap_ext_t      *bmp = NULL;
-       bmap_ext_t      *ext;
+       bmap_ext_t      *bmp;
+       blkent_t        *ent;
+       xfs_dfiloff_t   ento;
+       blkent_t        **entp;
        int             i;
        int             nex;
 
        if (nb == 1) {
-               /*
+               /* 
                 * in the common case, when mp->m_dirblkfsbs == 1,
                 * avoid additional malloc/free overhead
                 */
                bmpp_single->startblock = blkmap_get(blkmap, o);
-               goto single_ext;
+               bmpp_single->blockcount = 1;
+               bmpp_single->startoff = 0;
+               bmpp_single->flag = 0;
+               *bmpp = bmpp_single;
+               return (bmpp_single->startblock != NULLDFSBNO) ? 1 : 0;
        }
-       ext = blkmap->exts;
-       nex = 0;
-       for (i = 0; i < blkmap->nexts; i++, ext++) {
-
-               if (ext->startoff >= o + nb)
+       for (i = nex = 0, bmp = NULL, entp = blkmap->ents;
+            i < blkmap->nents;
+            i++, entp++) {
+               ent = *entp;
+               if (ent->startoff >= o + nb)
                        break;
-               if (ext->startoff + ext->blockcount <= o)
+               if (ent->startoff + ent->nblks <= o)
                        continue;
-
-               /*
-                * if all the requested blocks are in one extent (also common),
-                * use the bmpp_single option as well
-                */
-               if (!bmp && o >= ext->startoff &&
-                   o + nb <= ext->startoff + ext->blockcount) {
-                       bmpp_single->startblock =
-                                ext->startblock + (o - ext->startoff);
-                       goto single_ext;
+               for (ento = ent->startoff;
+                    ento < ent->startoff + ent->nblks && ento < o + nb;
+                    ento++) {
+                       if (ento < o)
+                               continue;
+                       if (bmp &&
+                           bmp[nex - 1].startoff + bmp[nex - 1].blockcount ==
+                                   ento &&
+                           bmp[nex - 1].startblock + bmp[nex - 1].blockcount ==
+                                   ent->blks[ento - ent->startoff])
+                               bmp[nex - 1].blockcount++;
+                       else {
+                               bmp = realloc(bmp, ++nex * sizeof(*bmp));
+                               if (bmp == NULL) {
+                                       do_warn(_("blkmap_getn realloc failed"
+                                               " (%u bytes)\n"),
+                                               nex * sizeof(*bmp));
+                                       continue;
+                               }
+                               bmp[nex - 1].startoff = ento;
+                               bmp[nex - 1].startblock =
+                                       ent->blks[ento - ent->startoff];
+                               bmp[nex - 1].blockcount = 1;
+                               bmp[nex - 1].flag = 0;
+                       }
                }
-
-               /*
-                * rare case - multiple extents for a single dir block
-                */
-               bmp = malloc(nb * sizeof(bmap_ext_t));
-               if (!bmp)
-                       do_error(_("blkmap_getn malloc failed (%u bytes)\n"),
-                                               nb * sizeof(bmap_ext_t));
-
-               bmp[nex].startblock = ext->startblock + (o - ext->startoff);
-               bmp[nex].blockcount = MIN(nb, ext->blockcount -
-                               (bmp[nex].startblock - ext->startblock));
-               o += bmp[nex].blockcount;
-               nb -= bmp[nex].blockcount;
-               nex++;
        }
        *bmpp = bmp;
        return nex;
+}
+
+/*
+ * Make a block map larger.
+ */
+void
+blkmap_grow(
+       blkmap_t        **blkmapp,
+       blkent_t        **entp,
+       blkent_t        *newent)
+{
+       blkmap_t        *blkmap;
+       size_t          size;
+       int             i;
+       int             idx;
 
-single_ext:
-       bmpp_single->blockcount = nb;
-       bmpp_single->startoff = 0;      /* not even used by caller! */
-       *bmpp = bmpp_single;
-       return (bmpp_single->startblock != NULLDFSBNO) ? 1 : 0;
+       blkmap = *blkmapp;
+       idx = (int)(entp - blkmap->ents);
+       if (blkmap->naents == blkmap->nents) {
+               size = BLKMAP_SIZE(blkmap->nents + 1);
+               if ((*blkmapp = blkmap = realloc(blkmap, size)) == NULL) {
+                       do_warn(_("realloc failed in blkmap_grow (%u bytes)\n"),
+                               size);
+                       return;
+               }
+               blkmap->naents++;
+       }
+       for (i = blkmap->nents; i > idx; i--)
+               blkmap->ents[i] = blkmap->ents[i - 1];
+       blkmap->ents[idx] = newent;
+       blkmap->nents++;
 }
 
 /*
@@ -168,12 +268,12 @@ xfs_dfiloff_t
 blkmap_last_off(
        blkmap_t        *blkmap)
 {
-       bmap_ext_t      *ext;
+       blkent_t        *ent;
 
-       if (!blkmap->nexts)
+       if (!blkmap->nents)
                return NULLDFILOFF;
-       ext = blkmap->exts + blkmap->nexts - 1;
-       return ext->startoff + ext->blockcount;
+       ent = blkmap->ents[blkmap->nents - 1];
+       return ent->startoff + ent->nblks;
 }
 
 /*
@@ -185,45 +285,73 @@ blkmap_next_off(
        xfs_dfiloff_t   o,
        int             *t)
 {
-       bmap_ext_t      *ext;
+       blkent_t        *ent;
+       blkent_t        **entp;
 
-       if (!blkmap->nexts)
+       if (!blkmap->nents)
                return NULLDFILOFF;
        if (o == NULLDFILOFF) {
                *t = 0;
-               return blkmap->exts[0].startoff;
+               ent = blkmap->ents[0];
+               return ent->startoff;
        }
-       ext = blkmap->exts + *t;
-       if (o < ext->startoff + ext->blockcount - 1)
+       entp = &blkmap->ents[*t];
+       ent = *entp;
+       if (o < ent->startoff + ent->nblks - 1)
                return o + 1;
-       if (*t >= blkmap->nexts - 1)
+       entp++;
+       if (entp >= &blkmap->ents[blkmap->nents])
                return NULLDFILOFF;
        (*t)++;
-       return ext[1].startoff;
+       ent = *entp;
+       return ent->startoff;
 }
 
 /*
- * Make a block map larger.
+ * Set a block value in a block map.
  */
-static blkmap_t *
-blkmap_grow(
-       blkmap_t        **blkmapp)
+void
+blkmap_set_blk(
+       blkmap_t        **blkmapp,
+       xfs_dfiloff_t   o,
+       xfs_dfsbno_t    b)
 {
-       pthread_key_t   key = dblkmap_key;
-       blkmap_t        *blkmap = *blkmapp;
+       blkmap_t        *blkmap;
+       blkent_t        *ent;
+       blkent_t        **entp;
+       blkent_t        *nextent;
 
-       if (pthread_getspecific(key) != blkmap) {
-               key = ablkmap_key;
-               ASSERT(pthread_getspecific(key) == blkmap);
+       blkmap = *blkmapp;
+       for (entp = blkmap->ents; entp < &blkmap->ents[blkmap->nents]; entp++) {
+               ent = *entp;
+               if (o < ent->startoff - 1) {
+                       ent = blkent_new(o, b, 1);
+                       blkmap_grow(blkmapp, entp, ent);
+                       return;
+               }
+               if (o == ent->startoff - 1) {
+                       blkent_prepend(entp, b, 1);
+                       return;
+               }
+               if (o >= ent->startoff && o < ent->startoff + ent->nblks) {
+                       ent->blks[o - ent->startoff] = b;
+                       return;
+               }
+               if (o > ent->startoff + ent->nblks)
+                       continue;
+               blkent_append(entp, b, 1);
+               if (entp == &blkmap->ents[blkmap->nents - 1])
+                       return;
+               ent = *entp;
+               nextent = entp[1];
+               if (ent->startoff + ent->nblks < nextent->startoff)
+                       return;
+               blkent_append(entp, nextent->blks[0], nextent->nblks);
+               blkmap_shrink(blkmap, &entp[1]);
+               return;
        }
-
-       blkmap->naexts += 4;
-       blkmap = realloc(blkmap, BLKMAP_SIZE(blkmap->naexts));
-       if (blkmap == NULL)
-               do_error(_("realloc failed in blkmap_grow\n"));
-       *blkmapp = blkmap;
-       pthread_setspecific(key, blkmap);
-       return blkmap;
+       ent = blkent_new(o, b, 1);
+       blkmap_grow(blkmapp, entp, ent);
 }
 
 /*
@@ -236,23 +364,46 @@ blkmap_set_ext(
        xfs_dfsbno_t    b,
        xfs_dfilblks_t  c)
 {
-       blkmap_t        *blkmap = *blkmapp;
+       blkmap_t        *blkmap;
+       blkent_t        *ent;
+       blkent_t        **entp;
        xfs_extnum_t    i;
 
-       if (blkmap->nexts == blkmap->naexts)
-               blkmap = blkmap_grow(blkmapp);
-
-       for (i = 0; i < blkmap->nexts; i++) {
-               if (blkmap->exts[i].startoff > o) {
-                       memmove(blkmap->exts + i + 1,
-                               blkmap->exts + i,
-                               sizeof(bmap_ext_t) * (blkmap->nexts - i));
-                       break;
-               }
+       blkmap = *blkmapp;
+       if (!blkmap->nents) {
+               blkmap->ents[0] = blkent_new(o, b, c);
+               blkmap->nents = 1;
+               return;
+       }
+       entp = &blkmap->ents[blkmap->nents - 1];
+       ent = *entp;
+       if (ent->startoff + ent->nblks == o) {
+               blkent_append(entp, b, c);
+               return;
+       }
+       if (ent->startoff + ent->nblks < o) {
+               ent = blkent_new(o, b, c);
+               blkmap_grow(blkmapp, &blkmap->ents[blkmap->nents], ent);
+               return;
        }
+       for (i = 0; i < c; i++)
+               blkmap_set_blk(blkmapp, o + i, b + i);
+}
+
+/*
+ * Make a block map smaller.
+ */
+void
+blkmap_shrink(
+       blkmap_t        *blkmap,
+       blkent_t        **entp)
+{
+       int             i;
+       int             idx;
 
-       blkmap->exts[i].startoff = o;
-       blkmap->exts[i].startblock = b;
-       blkmap->exts[i].blockcount = c;
-       blkmap->nexts++;
+       free(*entp);
+       idx = (int)(entp - blkmap->ents);
+       for (i = idx + 1; i < blkmap->nents; i++)
+               blkmap->ents[i] = blkmap->ents[i - 1];
+       blkmap->nents--;
 }
index 58abf95fdab794282d226d19c1d37c5a9d186790..eba1799f5d0ab534c046a2343e1031f055f9aa8d 100644 (file)
  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
-#ifndef _XFS_REPAIR_BMAP_H
-#define _XFS_REPAIR_BMAP_H
+/*
+ * Block mapping code taken from xfs_db.
+ */
 
 /*
- * Extent descriptor.
+ * Block map entry.
  */
-typedef struct bmap_ext {
+typedef struct blkent {
        xfs_dfiloff_t   startoff;
-       xfs_dfsbno_t    startblock;
-       xfs_dfilblks_t  blockcount;
-} bmap_ext_t;
+       xfs_dfilblks_t  nblks;
+       xfs_dfsbno_t    blks[1];
+} blkent_t;
+#define        BLKENT_SIZE(n)  \
+       (offsetof(blkent_t, blks) + (sizeof(xfs_dfsbno_t) * (n)))
 
 /*
  * Block map.
  */
 typedef        struct blkmap {
-       int             naexts;
-       int             nexts;
-       bmap_ext_t      exts[1];
+       int             naents;
+       int             nents;
+       blkent_t        *ents[1];
 } blkmap_t;
-
 #define        BLKMAP_SIZE(n)  \
-       (offsetof(blkmap_t, exts) + (sizeof(bmap_ext_t) * (n)))
-
-blkmap_t       *blkmap_alloc(xfs_extnum_t nex, int whichfork);
-void           blkmap_free(blkmap_t *blkmap);
+       (offsetof(blkmap_t, ents) + (sizeof(blkent_t *) * (n)))
 
-void           blkmap_set_ext(blkmap_t **blkmapp, xfs_dfiloff_t o,
-                              xfs_dfsbno_t b, xfs_dfilblks_t c);
+/*
+ * Extent descriptor.
+ */
+typedef struct bmap_ext {
+       xfs_dfiloff_t   startoff;
+       xfs_dfsbno_t    startblock;
+       xfs_dfilblks_t  blockcount;
+       int             flag;
+} bmap_ext_t;
 
+void           blkent_append(blkent_t **entp, xfs_dfsbno_t b,
+                             xfs_dfilblks_t c);
+blkent_t       *blkent_new(xfs_dfiloff_t o, xfs_dfsbno_t b, xfs_dfilblks_t c);
+void           blkent_prepend(blkent_t **entp, xfs_dfsbno_t b,
+                              xfs_dfilblks_t c);
+blkmap_t       *blkmap_alloc(xfs_extnum_t);
+void           blkmap_free(blkmap_t *blkmap);
 xfs_dfsbno_t   blkmap_get(blkmap_t *blkmap, xfs_dfiloff_t o);
 int            blkmap_getn(blkmap_t *blkmap, xfs_dfiloff_t o,
-                           xfs_dfilblks_t nb, bmap_ext_t **bmpp,
+                           xfs_dfilblks_t nb, bmap_ext_t **bmpp, 
                            bmap_ext_t *bmpp_single);
+void           blkmap_grow(blkmap_t **blkmapp, blkent_t **entp,
+                           blkent_t *newent);
 xfs_dfiloff_t  blkmap_last_off(blkmap_t *blkmap);
 xfs_dfiloff_t  blkmap_next_off(blkmap_t *blkmap, xfs_dfiloff_t o, int *t);
-
-#endif /* _XFS_REPAIR_BMAP_H */
+void           blkmap_set_blk(blkmap_t **blkmapp, xfs_dfiloff_t o,
+                              xfs_dfsbno_t b);
+void           blkmap_set_ext(blkmap_t **blkmapp, xfs_dfiloff_t o,
+                              xfs_dfsbno_t b, xfs_dfilblks_t c);
+void           blkmap_shrink(blkmap_t *blkmap, blkent_t **entp);
diff --git a/repair/btree.c b/repair/btree.c
deleted file mode 100644 (file)
index f91f96b..0000000
+++ /dev/null
@@ -1,1234 +0,0 @@
-/*
- * Copyright (c) 2007, Silicon Graphics, Inc. Barry Naujok <bnaujok@sgi.com>
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-
-#include <libxfs.h>
-#include "btree.h"
-
-
-#define BTREE_KEY_MAX          7
-#define BTREE_KEY_MIN          (BTREE_KEY_MAX / 2)
-
-#define BTREE_PTR_MAX          (BTREE_KEY_MAX + 1)
-
-struct btree_node {
-       unsigned long           num_keys;
-       unsigned long           keys[BTREE_KEY_MAX];
-       struct btree_node *     ptrs[BTREE_PTR_MAX];
-};
-
-struct btree_cursor {
-       struct btree_node       *node;
-       int                     index;
-};
-
-struct btree_root {
-       struct btree_node       *root_node;
-       struct btree_cursor     *cursor;        /* track path to end leaf */
-       int                     height;
-       /* lookup cache */
-       int                     keys_valid;     /* set if the cache is valid */
-       unsigned long           cur_key;
-       unsigned long           next_key;
-       void                    *next_value;
-       unsigned long           prev_key;
-       void                    *prev_value;
-#ifdef BTREE_STATS
-       struct btree_stats {
-               unsigned long   num_items;
-               unsigned long   max_items;
-               int             alloced;
-               int             cache_hits;
-               int             cache_misses;
-               int             lookup;
-               int             find;
-               int             key_update;
-               int             value_update;
-               int             insert;
-               int             delete;
-               int             inc_height;
-               int             dec_height;
-               int             shift_prev;
-               int             shift_next;
-               int             split;
-               int             merge_prev;
-               int             merge_next;
-               int             balance_prev;
-               int             balance_next;
-       } stats;
-#endif
-};
-
-
-static struct btree_node *
-btree_node_alloc(void)
-{
-       return calloc(1, sizeof(struct btree_node));
-}
-
-static void
-btree_node_free(
-       struct btree_node       *node)
-{
-       free(node);
-}
-
-static void
-btree_free_nodes(
-       struct btree_node       *node,
-       int                     level)
-{
-       int                     i;
-
-       if (level)
-               for (i = 0; i <= node->num_keys; i++)
-                       btree_free_nodes(node->ptrs[i], level - 1);
-       btree_node_free(node);
-}
-
-static void
-__btree_init(
-       struct btree_root       *root)
-{
-       memset(root, 0, sizeof(struct btree_root));
-       root->height = 1;
-       root->cursor = calloc(1, sizeof(struct btree_cursor));
-       root->root_node = btree_node_alloc();
-       ASSERT(root->root_node);
-#ifdef BTREE_STATS
-       root->stats.max_items = 1;
-       root->stats.alloced += 1;
-#endif
-}
-
-static void
-__btree_free(
-       struct btree_root       *root)
-{
-       btree_free_nodes(root->root_node, root->height - 1);
-       free(root->cursor);
-       root->height = 0;
-       root->cursor = NULL;
-       root->root_node = NULL;
-}
-
-void
-btree_init(
-       struct btree_root       **root)
-{
-       *root = calloc(1, sizeof(struct btree_root));
-       __btree_init(*root);
-}
-
-void
-btree_clear(
-       struct btree_root       *root)
-{
-       __btree_free(root);
-       __btree_init(root);
-}
-
-void
-btree_destroy(
-       struct btree_root       *root)
-{
-       __btree_free(root);
-       free(root);
-}
-
-int
-btree_is_empty(
-       struct btree_root       *root)
-{
-       return root->root_node->num_keys == 0;
-}
-
-static inline void
-btree_invalidate_cursor(
-       struct btree_root       *root)
-{
-       root->cursor[0].node = NULL;
-       root->keys_valid = 0;
-}
-
-static inline unsigned long
-btree_key_of_cursor(
-       struct btree_cursor     *cursor,
-       int                     height)
-{
-       while (cursor->node->num_keys == cursor->index && --height > 0)
-               cursor++;
-       return cursor->node->keys[cursor->index];
-}
-
-static void *
-btree_get_prev(
-       struct btree_root       *root,
-       unsigned long           *key)
-{
-       struct btree_cursor     *cur = root->cursor;
-       int                     level = 0;
-       struct btree_node       *node;
-
-       if (cur->index > 0) {
-               if (key)
-                       *key = cur->node->keys[cur->index - 1];
-               return cur->node->ptrs[cur->index - 1];
-       }
-
-       /* else need to go up and back down the tree to find the previous */
-
-       while (cur->index == 0) {
-               if (++level == root->height)
-                       return NULL;
-               cur++;
-       }
-
-       /* the key is in the current level */
-       if (key)
-               *key = cur->node->keys[cur->index - 1];
-
-       /* descend back down the right side to get the pointer */
-       node = cur->node->ptrs[cur->index - 1];
-       while (level--)
-               node = node->ptrs[node->num_keys];
-       return node;
-}
-
-static void *
-btree_get_next(
-       struct btree_root       *root,
-       unsigned long           *key)
-{
-       struct btree_cursor     *cur = root->cursor;
-       int                     level = 0;
-       struct btree_node       *node;
-
-       while (cur->index == cur->node->num_keys) {
-               if (++level == root->height)
-                       return NULL;
-               cur++;
-       }
-       if (level == 0) {
-               if (key) {
-                       cur->index++;
-                       *key = btree_key_of_cursor(cur, root->height);
-                       cur->index--;
-               }
-               return cur->node->ptrs[cur->index + 1];
-       }
-
-       node = cur->node->ptrs[cur->index + 1];
-       while (--level > 0)
-               node = node->ptrs[0];
-       if (key)
-               *key = node->keys[0];
-       return node->ptrs[0];
-}
-
-/*
- * Lookup/Search functions
- */
-
-static int
-btree_do_search(
-       struct btree_root       *root,
-       unsigned long           key)
-{
-       unsigned long           k = 0;
-       struct btree_cursor     *cur = root->cursor + root->height;
-       struct btree_node       *node = root->root_node;
-       int                     height = root->height;
-       int                     key_found = 0;
-       int                     i;
-
-       while (--height >= 0) {
-               cur--;
-               for (i = 0; i < node->num_keys; i++)
-                       if (node->keys[i] >= key) {
-                               k = node->keys[i];
-                               key_found = 1;
-                               break;
-                       }
-               cur->node = node;
-               cur->index = i;
-               node = node->ptrs[i];
-       }
-       root->keys_valid = key_found;
-       if (!key_found)
-               return 0;
-
-       root->cur_key = k;
-       root->next_value = NULL;        /* do on-demand next value lookup */
-       root->prev_value = btree_get_prev(root, &root->prev_key);
-       return 1;
-}
-
-static int
-btree_search(
-       struct btree_root       *root,
-       unsigned long           key)
-{
-       if (root->keys_valid && key <= root->cur_key &&
-                               (!root->prev_value || key > root->prev_key)) {
-#ifdef BTREE_STATS
-               root->stats.cache_hits++;
-#endif
-               return 1;
-       }
-#ifdef BTREE_STATS
-       root->stats.cache_misses++;
-#endif
-       return btree_do_search(root, key);
-}
-
-void *
-btree_find(
-       struct btree_root       *root,
-       unsigned long           key,
-       unsigned long           *actual_key)
-{
-#ifdef BTREE_STATS
-       root->stats.find += 1;
-#endif
-       if (!btree_search(root, key))
-               return NULL;
-
-       if (actual_key)
-               *actual_key = root->cur_key;
-       return root->cursor->node->ptrs[root->cursor->index];
-}
-
-void *
-btree_lookup(
-       struct btree_root       *root,
-       unsigned long           key)
-{
-#ifdef BTREE_STATS
-       root->stats.lookup += 1;
-#endif
-       if (!btree_search(root, key) || root->cur_key != key)
-               return NULL;
-       return root->cursor->node->ptrs[root->cursor->index];
-}
-
-void *
-btree_peek_prev(
-       struct btree_root       *root,
-       unsigned long           *key)
-{
-       if (!root->keys_valid)
-               return NULL;
-       if (key)
-               *key = root->prev_key;
-       return root->prev_value;
-}
-
-void *
-btree_peek_next(
-       struct btree_root       *root,
-       unsigned long           *key)
-{
-       if (!root->keys_valid)
-               return NULL;
-       if (!root->next_value)
-               root->next_value = btree_get_next(root, &root->next_key);
-       if (key)
-               *key = root->next_key;
-       return root->next_value;
-}
-
-static void *
-btree_move_cursor_to_next(
-       struct btree_root       *root,
-       unsigned long           *key)
-{
-       struct btree_cursor     *cur = root->cursor;
-       int                     level = 0;
-
-       while (cur->index == cur->node->num_keys) {
-               if (++level == root->height)
-                       return NULL;
-               cur++;
-       }
-       cur->index++;
-       if (level == 0) {
-               if (key)
-                       *key = btree_key_of_cursor(cur, root->height);
-               return cur->node->ptrs[cur->index];
-       }
-
-       while (--level >= 0) {
-               root->cursor[level].node = cur->node->ptrs[cur->index];
-               root->cursor[level].index = 0;
-               cur--;
-       }
-       if (key)
-               *key = cur->node->keys[0];
-       return cur->node->ptrs[0];
-}
-
-void *
-btree_lookup_next(
-       struct btree_root       *root,
-       unsigned long           *key)
-{
-       void                    *value;
-
-       if (!root->keys_valid)
-               return NULL;
-
-       root->prev_key = root->cur_key;
-       root->prev_value = root->cursor->node->ptrs[root->cursor->index];
-
-       value = btree_move_cursor_to_next(root, &root->cur_key);
-       if (!value) {
-               btree_invalidate_cursor(root);
-               return NULL;
-       }
-       root->next_value = NULL;        /* on-demand next value fetch */
-       if (key)
-               *key = root->cur_key;
-       return value;
-}
-
-static void *
-btree_move_cursor_to_prev(
-       struct btree_root       *root,
-       unsigned long           *key)
-{
-       struct btree_cursor     *cur = root->cursor;
-       int                     level = 0;
-
-       while (cur->index == 0) {
-               if (++level == root->height)
-                       return NULL;
-               cur++;
-       }
-       cur->index--;
-       if (key)        /* the key is in the current level */
-               *key = cur->node->keys[cur->index];
-       while (level > 0) {
-               level--;
-               root->cursor[level].node = cur->node->ptrs[cur->index];
-               root->cursor[level].index = root->cursor[level].node->num_keys;
-               cur--;
-       }
-       return cur->node->ptrs[cur->index];
-}
-
-void *
-btree_lookup_prev(
-       struct btree_root       *root,
-       unsigned long           *key)
-{
-       void                    *value;
-
-       if (!root->keys_valid)
-               return NULL;
-
-       value = btree_move_cursor_to_prev(root, &root->cur_key);
-       if (!value)
-               return NULL;
-       root->prev_value = btree_get_prev(root, &root->prev_key);
-       root->next_value = NULL;        /* on-demand next value fetch */
-       if (key)
-               *key = root->cur_key;
-       return value;
-}
-
-void *
-btree_uncached_lookup(
-       struct btree_root       *root,
-       unsigned long           key)
-{
-       /* cursor-less (ie. uncached) lookup */
-       int                     height = root->height - 1;
-       struct btree_node       *node = root->root_node;
-       int                     i;
-       int                     key_found = 0;
-
-       while (height >= 0) {
-               for (i = 0; i < node->num_keys; i++)
-                       if (node->keys[i] >= key) {
-                               key_found = node->keys[i] == key;
-                               break;
-                       }
-               node = node->ptrs[i];
-               height--;
-       }
-       return key_found ? node : NULL;
-}
-
-/* Update functions */
-
-static inline void
-btree_update_node_key(
-       struct btree_root       *root,
-       struct btree_cursor     *cursor,
-       int                     level,
-       unsigned long           new_key)
-{
-       int                     i;
-
-#ifdef BTREE_STATS
-       root->stats.key_update += 1;
-#endif
-
-       cursor += level;
-       for (i = level; i < root->height; i++) {
-               if (cursor->index < cursor->node->num_keys) {
-                       cursor->node->keys[cursor->index] = new_key;
-                       break;
-               }
-               cursor++;
-       }
-}
-
-int
-btree_update_key(
-       struct btree_root       *root,
-       unsigned long           old_key,
-       unsigned long           new_key)
-{
-       if (!btree_search(root, old_key) || root->cur_key != old_key)
-               return ENOENT;
-
-       if (root->next_value && new_key >= root->next_key)
-               return EINVAL;
-
-       if (root->prev_value && new_key <= root->prev_key)
-               return EINVAL;
-
-       btree_update_node_key(root, root->cursor, 0, new_key);
-
-       return 0;
-}
-
-int
-btree_update_value(
-       struct btree_root       *root,
-       unsigned long           key,
-       void                    *new_value)
-{
-       if (!new_value)
-               return EINVAL;
-
-       if (!btree_search(root, key) || root->cur_key != key)
-               return ENOENT;
-
-#ifdef BTREE_STATS
-       root->stats.value_update += 1;
-#endif
-       root->cursor->node->ptrs[root->cursor->index] = new_value;
-
-       return 0;
-}
-
-/*
- * Cursor modification functions - used for inserting and deleting
- */
-
-static struct btree_cursor *
-btree_copy_cursor_prev(
-       struct btree_root       *root,
-       struct btree_cursor     *dest_cursor,
-       int                     level)
-{
-       struct btree_cursor     *src_cur = root->cursor + level;
-       struct btree_cursor     *dst_cur;
-       int                     l = level;
-       int                     i;
-
-       if (level >= root->height)
-               return NULL;
-
-       while (src_cur->index == 0) {
-               if (++l >= root->height)
-                       return NULL;
-               src_cur++;
-       }
-       for (i = l; i < root->height; i++)
-               dest_cursor[i] = *src_cur++;
-
-       dst_cur = dest_cursor + l;
-       dst_cur->index--;
-       while (l-- >= level) {
-               dest_cursor[l].node = dst_cur->node->ptrs[dst_cur->index];
-               dest_cursor[l].index = dest_cursor[l].node->num_keys;
-               dst_cur--;
-       }
-       return dest_cursor;
-}
-
-static struct btree_cursor *
-btree_copy_cursor_next(
-       struct btree_root       *root,
-       struct btree_cursor     *dest_cursor,
-       int                     level)
-{
-       struct btree_cursor     *src_cur = root->cursor + level;
-       struct btree_cursor     *dst_cur;
-       int                     l = level;
-       int                     i;
-
-       if (level >= root->height)
-               return NULL;
-
-       while (src_cur->index == src_cur->node->num_keys) {
-               if (++l >= root->height)
-                       return NULL;
-               src_cur++;
-       }
-       for (i = l; i < root->height; i++)
-               dest_cursor[i] = *src_cur++;
-
-       dst_cur = dest_cursor + l;
-       dst_cur->index++;
-       while (l-- >= level) {
-               dest_cursor[l].node = dst_cur->node->ptrs[dst_cur->index];
-               dest_cursor[l].index = 0;
-               dst_cur--;
-       }
-       return dest_cursor;
-}
-
-/*
- * Shift functions
- *
- * Tries to move items in the current leaf to its sibling if it has space.
- * Used in both insert and delete functions.
- * Returns the number of items shifted.
- */
-
-static int
-btree_shift_to_prev(
-       struct btree_root       *root,
-       int                     level,
-       struct btree_cursor     *prev_cursor,
-       int                     num_children)
-{
-       struct btree_node       *node;
-       struct btree_node       *prev_node;
-       int                     num_remain;     /* # of keys left in "node" */
-       unsigned long           key;
-       int                     i;
-
-       if (!prev_cursor || !num_children)
-               return 0;
-
-       prev_node = prev_cursor[level].node;
-       node = root->cursor[level].node;
-
-       ASSERT(num_children > 0 && num_children <= node->num_keys + 1);
-
-       if ((prev_node->num_keys + num_children) > BTREE_KEY_MAX)
-               return 0;
-
-#ifdef BTREE_STATS
-       root->stats.shift_prev += 1;
-#endif
-
-       num_remain = node->num_keys - num_children;
-       ASSERT(num_remain == -1 || num_remain >= BTREE_KEY_MIN);
-
-       /* shift parent keys around */
-       level++;
-       if (num_remain > 0)
-               key = node->keys[num_children - 1];
-       else
-               key = btree_key_of_cursor(root->cursor + level,
-                                               root->height - level);
-       while (prev_cursor[level].index == prev_cursor[level].node->num_keys) {
-               level++;
-               ASSERT(level < root->height);
-       }
-       prev_node->keys[prev_node->num_keys] =
-                       prev_cursor[level].node->keys[prev_cursor[level].index];
-       prev_cursor[level].node->keys[prev_cursor[level].index] = key;
-
-       /* copy pointers and keys to the end of the prev node */
-       for (i = 0; i < num_children - 1; i++) {
-               prev_node->keys[prev_node->num_keys + 1 + i] = node->keys[i];
-               prev_node->ptrs[prev_node->num_keys + 1 + i] = node->ptrs[i];
-       }
-       prev_node->ptrs[prev_node->num_keys + 1 + i] = node->ptrs[i];
-       prev_node->num_keys += num_children;
-
-       /* move remaining pointers/keys to start of node */
-       if (num_remain >= 0) {
-               for (i = 0; i < num_remain; i++) {
-                       node->keys[i] = node->keys[num_children + i];
-                       node->ptrs[i] = node->ptrs[num_children + i];
-               }
-               node->ptrs[i] = node->ptrs[num_children + i];
-               node->num_keys = num_remain;
-       } else
-               node->num_keys = 0;
-
-       return num_children;
-}
-
-static int
-btree_shift_to_next(
-       struct btree_root       *root,
-       int                     level,
-       struct btree_cursor     *next_cursor,
-       int                     num_children)
-{
-       struct btree_node       *node;
-       struct btree_node       *next_node;
-       int                     num_remain;     /* # of children left in node */
-       int                     i;
-
-       if (!next_cursor || !num_children)
-               return 0;
-
-       node = root->cursor[level].node;
-       next_node = next_cursor[level].node;
-
-       ASSERT(num_children > 0 && num_children <= node->num_keys + 1);
-
-       if ((next_node->num_keys + num_children) > BTREE_KEY_MAX)
-               return 0;
-
-       num_remain = node->num_keys + 1 - num_children;
-       ASSERT(num_remain == 0 || num_remain > BTREE_KEY_MIN);
-
-#ifdef BTREE_STATS
-       root->stats.shift_next += 1;
-#endif
-
-       /* make space for "num_children" items at beginning of next-leaf */
-       i = next_node->num_keys;
-       next_node->ptrs[num_children + i] = next_node->ptrs[i];
-       while (--i >= 0) {
-               next_node->keys[num_children + i] = next_node->keys[i];
-               next_node->ptrs[num_children + i] = next_node->ptrs[i];
-       }
-
-       /* update keys in parent and next node from parent */
-       do {
-               level++;
-               ASSERT(level < root->height);
-       } while (root->cursor[level].index == root->cursor[level].node->num_keys);
-
-       next_node->keys[num_children - 1] =
-               root->cursor[level].node->keys[root->cursor[level].index];
-       root->cursor[level].node->keys[root->cursor[level].index] =
-               node->keys[node->num_keys - num_children];
-
-       /* copy last "num_children" items from node into start of next-node */
-       for (i = 0; i < num_children - 1; i++) {
-               next_node->keys[i] = node->keys[num_remain + i];
-               next_node->ptrs[i] = node->ptrs[num_remain + i];
-       }
-       next_node->ptrs[i] = node->ptrs[num_remain + i];
-       next_node->num_keys += num_children;
-
-       if (num_remain > 0)
-               node->num_keys -= num_children;
-       else
-               node->num_keys = 0;
-
-       return num_children;
-}
-
-/*
- * Insertion functions
- */
-
-static struct btree_node *
-btree_increase_height(
-       struct btree_root       *root)
-{
-       struct btree_node       *new_root;
-       struct btree_cursor     *new_cursor;
-
-       new_cursor = realloc(root->cursor, (root->height + 1) *
-                               sizeof(struct btree_cursor));
-       if (!new_cursor)
-               return NULL;
-       root->cursor = new_cursor;
-
-       new_root = btree_node_alloc();
-       if (!new_root)
-               return NULL;
-
-#ifdef BTREE_STATS
-       root->stats.alloced += 1;
-       root->stats.inc_height += 1;
-       root->stats.max_items *= BTREE_PTR_MAX;
-#endif
-
-       new_root->ptrs[0] = root->root_node;
-       root->root_node = new_root;
-
-       root->cursor[root->height].node = new_root;
-       root->cursor[root->height].index = 0;
-
-       root->height++;
-
-       return new_root;
-}
-
-static int
-btree_insert_item(
-       struct btree_root       *root,
-       int                     level,
-       unsigned long           key,
-       void                    *value);
-
-
-static struct btree_node *
-btree_split(
-       struct btree_root       *root,
-       int                     level,
-       unsigned long           key,
-       int                     *index)
-{
-       struct btree_node       *node = root->cursor[level].node;
-       struct btree_node       *new_node;
-       int                     i;
-
-       new_node = btree_node_alloc();
-       if (!new_node)
-               return NULL;
-
-       if (btree_insert_item(root, level + 1, node->keys[BTREE_KEY_MIN],
-                                                       new_node) != 0) {
-               btree_node_free(new_node);
-               return NULL;
-       }
-
-#ifdef BTREE_STATS
-       root->stats.alloced += 1;
-       root->stats.split += 1;
-#endif
-
-       for (i = 0; i < BTREE_KEY_MAX - BTREE_KEY_MIN - 1; i++) {
-               new_node->keys[i] = node->keys[BTREE_KEY_MIN + 1 + i];
-               new_node->ptrs[i] = node->ptrs[BTREE_KEY_MIN + 1 + i];
-       }
-       new_node->ptrs[i] = node->ptrs[BTREE_KEY_MIN + 1 + i];
-       new_node->num_keys = BTREE_KEY_MAX - BTREE_KEY_MIN - 1;
-
-       node->num_keys = BTREE_KEY_MIN;
-       if (key < node->keys[BTREE_KEY_MIN])
-               return node;    /* index doesn't change */
-
-       /* insertion point is in new node... */
-       *index -= BTREE_KEY_MIN + 1;
-       return new_node;
-}
-
-static int
-btree_insert_shift_to_prev(
-       struct btree_root       *root,
-       int                     level,
-       int                     *index)
-{
-       struct btree_cursor     tmp_cursor[root->height];
-       int                     n;
-
-       if (*index <= 0)
-               return -1;
-
-       if (!btree_copy_cursor_prev(root, tmp_cursor, level + 1))
-               return -1;
-
-       n = MIN(*index, (BTREE_PTR_MAX - tmp_cursor[level].node->num_keys) / 2);
-       if (!n || !btree_shift_to_prev(root, level, tmp_cursor, n))
-               return -1;
-
-       *index -= n;
-       return 0;
-}
-
-static int
-btree_insert_shift_to_next(
-       struct btree_root       *root,
-       int                     level,
-       int                     *index)
-{
-       struct btree_cursor     tmp_cursor[root->height];
-       int                     n;
-
-       if (*index >= BTREE_KEY_MAX)
-               return -1;
-
-       if (!btree_copy_cursor_next(root, tmp_cursor, level + 1))
-               return -1;
-
-       n = MIN(BTREE_KEY_MAX - *index,
-               (BTREE_PTR_MAX - tmp_cursor[level].node->num_keys) / 2);
-       if (!n || !btree_shift_to_next(root, level, tmp_cursor, n))
-               return -1;
-       return 0;
-}
-
-static int
-btree_insert_item(
-       struct btree_root       *root,
-       int                     level,
-       unsigned long           key,
-       void                    *value)
-{
-       struct btree_node       *node = root->cursor[level].node;
-       int                     index = root->cursor[level].index;
-       int                     i;
-
-       if (node->num_keys == BTREE_KEY_MAX) {
-               if (btree_insert_shift_to_prev(root, level, &index) == 0)
-                       goto insert;
-               if (btree_insert_shift_to_next(root, level, &index) == 0)
-                       goto insert;
-               if (level == root->height - 1) {
-                       if (!btree_increase_height(root))
-                               return ENOMEM;
-               }
-               node = btree_split(root, level, key, &index);
-               if (!node)
-                       return ENOMEM;
-       }
-insert:
-       ASSERT(index <= node->num_keys);
-
-       i = node->num_keys;
-       node->ptrs[i + 1] = node->ptrs[i];
-       while (--i >= index) {
-               node->keys[i + 1] = node->keys[i];
-               node->ptrs[i + 1] = node->ptrs[i];
-       }
-
-       node->num_keys++;
-       node->keys[index] = key;
-
-       if (level == 0)
-               node->ptrs[index] = value;
-       else
-               node->ptrs[index + 1] = value;
-
-       return 0;
-}
-
-
-
-int
-btree_insert(
-       struct btree_root       *root,
-       unsigned long           key,
-       void                    *value)
-{
-       int                     result;
-
-       if (!value)
-               return EINVAL;
-
-       if (btree_search(root, key) && root->cur_key == key)
-               return EEXIST;
-
-#ifdef BTREE_STATS
-       root->stats.insert += 1;
-       root->stats.num_items += 1;
-#endif
-
-       result = btree_insert_item(root, 0, key, value);
-
-       btree_invalidate_cursor(root);
-
-       return result;
-}
-
-
-/*
- * Deletion functions
- *
- * Rather more complicated as deletions has 4 ways to go once a node
- * ends up with less than the minimum number of keys:
- *   - move remainder to previous node
- *   - move remainder to next node
- *       (both will involve a parent deletion which may recurse)
- *   - balance by moving some items from previous node
- *   - balance by moving some items from next node
- */
-
-static void
-btree_decrease_height(
-       struct btree_root       *root)
-{
-       struct btree_node       *old_root = root->root_node;
-
-       ASSERT(old_root->num_keys == 0);
-
-#ifdef BTREE_STATS
-       root->stats.alloced -= 1;
-       root->stats.dec_height += 1;
-       root->stats.max_items /= BTREE_PTR_MAX;
-#endif
-       root->root_node = old_root->ptrs[0];
-       btree_node_free(old_root);
-       root->height--;
-}
-
-static int
-btree_merge_with_prev(
-       struct btree_root       *root,
-       int                     level,
-       struct btree_cursor     *prev_cursor)
-{
-       if (!prev_cursor)
-               return 0;
-
-       if (!btree_shift_to_prev(root, level, prev_cursor,
-                                       root->cursor[level].node->num_keys + 1))
-               return 0;
-
-#ifdef BTREE_STATS
-       root->stats.merge_prev += 1;
-#endif
-       return 1;
-}
-
-static int
-btree_merge_with_next(
-       struct btree_root       *root,
-       int                     level,
-       struct btree_cursor     *next_cursor)
-{
-       if (!next_cursor)
-               return 0;
-
-       if (!btree_shift_to_next(root, level, next_cursor,
-                                       root->cursor[level].node->num_keys + 1))
-               return 0;
-
-#ifdef BTREE_STATS
-       root->stats.merge_next += 1;
-#endif
-       return 1;
-}
-
-static int
-btree_balance_with_prev(
-       struct btree_root       *root,
-       int                     level,
-       struct btree_cursor     *prev_cursor)
-{
-       struct btree_cursor     *root_cursor = root->cursor;
-
-       if (!prev_cursor)
-               return 0;
-       ASSERT(prev_cursor[level].node->num_keys > BTREE_KEY_MIN);
-
-#ifdef BTREE_STATS
-       root->stats.balance_prev += 1;
-#endif
-       /*
-        * Move some nodes from the prev node into the current node.
-        * As the shift operation is a right shift and is relative to
-        * the root cursor, make the root cursor the prev cursor and
-        * pass in the root cursor as the next cursor.
-        */
-
-       root->cursor = prev_cursor;
-       if (!btree_shift_to_next(root, level, root_cursor,
-               (prev_cursor[level].node->num_keys + 1 - BTREE_KEY_MIN) / 2))
-                       abort();
-       root->cursor = root_cursor;
-
-       return 1;
-}
-
-static int
-btree_balance_with_next(
-       struct btree_root       *root,
-       int                     level,
-       struct btree_cursor     *next_cursor)
-{
-       struct btree_cursor     *root_cursor = root->cursor;
-
-       if (!next_cursor)
-               return 0;
-       assert(next_cursor[level].node->num_keys > BTREE_KEY_MIN);
-
-#ifdef btree_stats
-       root->stats.balance_next += 1;
-#endif
-       /*
-        * move some nodes from the next node into the current node.
-        * as the shift operation is a left shift and is relative to
-        * the root cursor, make the root cursor the next cursor and
-        * pass in the root cursor as the prev cursor.
-        */
-
-       root->cursor = next_cursor;
-       if (!btree_shift_to_prev(root, level, root_cursor,
-               (next_cursor[level].node->num_keys + 1 - BTREE_KEY_MIN) / 2))
-                       abort();
-       root->cursor = root_cursor;
-
-       return 1;
-
-}
-
-static void
-btree_delete_key(
-       struct btree_root       *root,
-       int                     level);
-
-/*
- * btree_delete_node:
- *
- * Return 0 if it's done or 1 if the next level needs to be collapsed
- */
-static void
-btree_delete_node(
-       struct btree_root       *root,
-       int                     level)
-{
-       struct btree_cursor     prev_cursor[root->height];
-       struct btree_cursor     next_cursor[root->height];
-       struct btree_cursor     *pc;
-       struct btree_cursor     *nc;
-
-       /*
-        * the node has underflowed, grab or merge keys/items from a
-        * neighbouring node.
-        */
-
-       if (level == root->height - 1) {
-               if (level > 0 && root->root_node->num_keys == 0)
-                       btree_decrease_height(root);
-               return;
-       }
-
-       pc = btree_copy_cursor_prev(root, prev_cursor, level + 1);
-       if (!btree_merge_with_prev(root, level, pc)) {
-               nc = btree_copy_cursor_next(root, next_cursor, level + 1);
-               if (!btree_merge_with_next(root, level, nc)) {
-                       /* merging failed, try redistrubution */
-                       if (!btree_balance_with_prev(root, level, pc) &&
-                           !btree_balance_with_next(root, level, nc))
-                               abort();
-                       return; /* when balancing, then the node isn't freed */
-               }
-       }
-
-#ifdef BTREE_STATS
-       root->stats.alloced -= 1;
-#endif
-       btree_node_free(root->cursor[level].node);
-
-       btree_delete_key(root, level + 1);
-}
-
-static void
-btree_delete_key(
-       struct btree_root       *root,
-       int                     level)
-{
-       struct btree_node       *node = root->cursor[level].node;
-       int                     index = root->cursor[level].index;
-
-       node->num_keys--;
-       if (index <= node->num_keys) {
-               /*
-                * if not deleting the last item, shift higher items down
-                * to cover the item being deleted
-                */
-               while (index < node->num_keys) {
-                       node->keys[index] = node->keys[index + 1];
-                       node->ptrs[index] = node->ptrs[index + 1];
-                       index++;
-               }
-               node->ptrs[index] = node->ptrs[index + 1];
-       } else {
-               /*
-                * else update the associated parent key as the last key
-                * in the leaf has changed
-                */
-               btree_update_node_key(root, root->cursor, level + 1,
-                                               node->keys[node->num_keys]);
-       }
-       /*
-        * if node underflows, either merge with sibling or rebalance
-        * with sibling.
-        */
-       if (node->num_keys < BTREE_KEY_MIN)
-               btree_delete_node(root, level);
-}
-
-void *
-btree_delete(
-       struct btree_root       *root,
-       unsigned long           key)
-{
-       void                    *value;
-
-       value = btree_lookup(root, key);
-       if (!value)
-               return NULL;
-
-#ifdef BTREE_STATS
-       root->stats.delete += 1;
-       root->stats.num_items -= 1;
-#endif
-
-       btree_delete_key(root, 0);
-
-       btree_invalidate_cursor(root);
-
-       return value;
-}
-
-#ifdef BTREE_STATS
-void
-btree_print_stats(
-       struct btree_root       *root,
-       FILE                    *f)
-{
-       unsigned long           max_items = root->stats.max_items *
-                                               (root->root_node->num_keys + 1);
-
-       fprintf(f, "\tnum_items = %lu, max_items = %lu (%lu%%)\n",
-                       root->stats.num_items, max_items,
-                       root->stats.num_items * 100 / max_items);
-       fprintf(f, "\talloced = %d nodes, %lu bytes, %lu bytes per item\n",
-                       root->stats.alloced,
-                       root->stats.alloced * sizeof(struct btree_node),
-                       root->stats.alloced * sizeof(struct btree_node) /
-                                                       root->stats.num_items);
-       fprintf(f, "\tlookup = %d\n", root->stats.lookup);
-       fprintf(f, "\tfind = %d\n", root->stats.find);
-       fprintf(f, "\tcache_hits = %d\n", root->stats.cache_hits);
-       fprintf(f, "\tcache_misses = %d\n", root->stats.cache_misses);
-       fprintf(f, "\tkey_update = %d\n", root->stats.key_update);
-       fprintf(f, "\tvalue_update = %d\n", root->stats.value_update);
-       fprintf(f, "\tinsert = %d\n", root->stats.insert);
-       fprintf(f, "\tshift_prev = %d\n", root->stats.shift_prev);
-       fprintf(f, "\tshift_next = %d\n", root->stats.shift_next);
-       fprintf(f, "\tsplit = %d\n", root->stats.split);
-       fprintf(f, "\tinc_height = %d\n", root->stats.inc_height);
-       fprintf(f, "\tdelete = %d\n", root->stats.delete);
-       fprintf(f, "\tmerge_prev = %d\n", root->stats.merge_prev);
-       fprintf(f, "\tmerge_next = %d\n", root->stats.merge_next);
-       fprintf(f, "\tbalance_prev = %d\n", root->stats.balance_prev);
-       fprintf(f, "\tbalance_next = %d\n", root->stats.balance_next);
-       fprintf(f, "\tdec_height = %d\n", root->stats.dec_height);
-}
-#endif
diff --git a/repair/btree.h b/repair/btree.h
deleted file mode 100644 (file)
index aff9504..0000000
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright (c) 2007 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-
-#ifndef _BTREE_H
-#define _BTREE_H
-
-
-struct btree_root;
-
-void
-btree_init(
-       struct btree_root       **root);
-
-void
-btree_destroy(
-       struct btree_root       *root);
-
-int
-btree_is_empty(
-       struct btree_root       *root);
-
-void *
-btree_lookup(
-       struct btree_root       *root,
-       unsigned long           key);
-
-void *
-btree_find(
-       struct btree_root       *root,
-       unsigned long           key,
-       unsigned long           *actual_key);
-
-void *
-btree_peek_prev(
-       struct btree_root       *root,
-       unsigned long           *key);
-
-void *
-btree_peek_next(
-       struct btree_root       *root,
-       unsigned long           *key);
-
-void *
-btree_lookup_next(
-       struct btree_root       *root,
-       unsigned long           *key);
-
-void *
-btree_lookup_prev(
-       struct btree_root       *root,
-       unsigned long           *key);
-
-int
-btree_insert(
-       struct btree_root       *root,
-       unsigned long           key,
-       void                    *value);
-
-void *
-btree_delete(
-       struct btree_root       *root,
-       unsigned long           key);
-
-int
-btree_update_key(
-       struct btree_root       *root,
-       unsigned long           old_key,
-       unsigned long           new_key);
-
-int
-btree_update_value(
-       struct btree_root       *root,
-       unsigned long           key,
-       void                    *new_value);
-
-void
-btree_clear(
-       struct btree_root       *root);
-
-#ifdef BTREE_STATS
-void
-btree_print_stats(
-       struct btree_root       *root,
-       FILE                    *f);
-#endif
-
-#endif /* _BTREE_H */
index 734e9a8399c3ac279410c3e06683446e8cdd27f4..4ccf804ed11c7febdbbae81396db1068a8306280 100644 (file)
@@ -118,7 +118,6 @@ verify_inode_chunk(xfs_mount_t              *mp,
        int             i;
        int             j;
        int             state;
-       xfs_extlen_t    blen;
 
        agno = XFS_INO_TO_AGNO(mp, ino);
        agino = XFS_INO_TO_AGINO(mp, ino);
@@ -152,8 +151,7 @@ verify_inode_chunk(xfs_mount_t              *mp,
 
                pthread_mutex_lock(&ag_locks[agno]);
 
-               state = get_bmap(agno, agbno);
-               switch (state) {
+               switch (state = get_agbno_state(mp, agno, agbno))  {
                case XR_E_INO:
                        do_warn(
                _("uncertain inode block %d/%d already known\n"),
@@ -162,7 +160,7 @@ verify_inode_chunk(xfs_mount_t              *mp,
                case XR_E_UNKNOWN:
                case XR_E_FREE1:
                case XR_E_FREE:
-                       set_bmap(agno, agbno, XR_E_INO);
+                       set_agbno_state(mp, agno, agbno, XR_E_INO);
                        break;
                case XR_E_MULT:
                case XR_E_INUSE:
@@ -174,14 +172,14 @@ verify_inode_chunk(xfs_mount_t            *mp,
                        do_warn(
                _("inode block %d/%d multiply claimed, (state %d)\n"),
                                agno, agbno, state);
-                       set_bmap(agno, agbno, XR_E_MULT);
+                       set_agbno_state(mp, agno, agbno, XR_E_MULT);
                        pthread_mutex_unlock(&ag_locks[agno]);
                        return(0);
                default:
                        do_warn(
                _("inode block %d/%d bad state, (state %d)\n"),
                                agno, agbno, state);
-                       set_bmap(agno, agbno, XR_E_INO);
+                       set_agbno_state(mp, agno, agbno, XR_E_INO);
                        break;
                }
 
@@ -434,11 +432,9 @@ verify_inode_chunk(xfs_mount_t             *mp,
         * entry or an iunlinked pointer
         */
        pthread_mutex_lock(&ag_locks[agno]);
-       for (cur_agbno = chunk_start_agbno;
-            cur_agbno < chunk_stop_agbno;
-            cur_agbno += blen)  {
-               state = get_bmap_ext(agno, cur_agbno, chunk_stop_agbno, &blen);
-               switch (state) {
+       for (j = 0, cur_agbno = chunk_start_agbno;
+                       cur_agbno < chunk_stop_agbno; cur_agbno++)  {
+               switch (state = get_agbno_state(mp, agno, cur_agbno))  {
                case XR_E_MULT:
                case XR_E_INUSE:
                case XR_E_INUSE_FS:
@@ -446,9 +442,9 @@ verify_inode_chunk(xfs_mount_t              *mp,
                        do_warn(
                _("inode block %d/%d multiply claimed, (state %d)\n"),
                                agno, cur_agbno, state);
-                       set_bmap_ext(agno, cur_agbno, blen, XR_E_MULT);
-                       pthread_mutex_unlock(&ag_locks[agno]);
-                       return 0;
+                       set_agbno_state(mp, agno, cur_agbno, XR_E_MULT);
+                       j = 1;
+                       break;
                case XR_E_INO:
                        do_error(
                _("uncertain inode block overlap, agbno = %d, ino = %llu\n"),
@@ -457,6 +453,11 @@ verify_inode_chunk(xfs_mount_t             *mp,
                default:
                        break;
                }
+
+               if (j) {
+                       pthread_mutex_unlock(&ag_locks[agno]);
+                       return(0);
+               }
        }
        pthread_mutex_unlock(&ag_locks[agno]);
 
@@ -484,10 +485,8 @@ verify_inode_chunk(xfs_mount_t             *mp,
        pthread_mutex_lock(&ag_locks[agno]);
 
        for (cur_agbno = chunk_start_agbno;
-            cur_agbno < chunk_stop_agbno;
-            cur_agbno += blen)  {
-               state = get_bmap_ext(agno, cur_agbno, chunk_stop_agbno, &blen);
-               switch (state) {
+                       cur_agbno < chunk_stop_agbno; cur_agbno++)  {
+               switch (state = get_agbno_state(mp, agno, cur_agbno))  {
                case XR_E_INO:
                        do_error(
                _("uncertain inode block %llu already known\n"),
@@ -496,7 +495,7 @@ verify_inode_chunk(xfs_mount_t              *mp,
                case XR_E_UNKNOWN:
                case XR_E_FREE1:
                case XR_E_FREE:
-                       set_bmap_ext(agno, cur_agbno, blen, XR_E_INO);
+                       set_agbno_state(mp, agno, cur_agbno, XR_E_INO);
                        break;
                case XR_E_MULT:
                case XR_E_INUSE:
@@ -510,7 +509,7 @@ verify_inode_chunk(xfs_mount_t              *mp,
                        do_warn(
                _("inode block %d/%d bad state, (state %d)\n"),
                                agno, cur_agbno, state);
-                       set_bmap_ext(agno, cur_agbno, blen, XR_E_INO);
+                       set_agbno_state(mp, agno, cur_agbno, XR_E_INO);
                        break;
                }
        }
@@ -630,9 +629,10 @@ process_inode_chunk(
                        cluster_count * sizeof(xfs_buf_t*));
 
        for (bp_index = 0; bp_index < cluster_count; bp_index++) {
+#ifdef XR_PF_TRACE
                pftrace("about to read off %llu in AG %d",
                        (long long)XFS_AGB_TO_DADDR(mp, agno, agbno), agno);
-
+#endif
                bplist[bp_index] = libxfs_readbuf(mp->m_dev,
                                        XFS_AGB_TO_DADDR(mp, agno, agbno),
                                        XFS_FSB_TO_BB(mp, blks_per_cluster), 0);
@@ -650,9 +650,11 @@ process_inode_chunk(
                }
                agbno += blks_per_cluster;
 
+#ifdef XR_PF_TRACE
                pftrace("readbuf %p (%llu, %d) in AG %d", bplist[bp_index],
                        (long long)XFS_BUF_ADDR(bplist[bp_index]),
                        XFS_BUF_COUNT(bplist[bp_index]), agno);
+#endif
        }
        agbno = XFS_AGINO_TO_AGBNO(mp, first_irec->ino_startnum);
 
@@ -743,23 +745,22 @@ process_inode_chunk(
         * mark block as an inode block in the incore bitmap
         */
        pthread_mutex_lock(&ag_locks[agno]);
-       state = get_bmap(agno, agbno);
-       switch (state) {
-       case XR_E_INO:  /* already marked */
-               break;
-       case XR_E_UNKNOWN:
-       case XR_E_FREE:
-       case XR_E_FREE1:
-               set_bmap(agno, agbno, XR_E_INO);
-               break;
-       case XR_E_BAD_STATE:
-               do_error(_("bad state in block map %d\n"), state);
-               break;
-       default:
-               set_bmap(agno, agbno, XR_E_MULT);
-               do_warn(_("inode block %llu multiply claimed, state was %d\n"),
-                       XFS_AGB_TO_FSB(mp, agno, agbno), state);
-               break;
+       switch (state = get_agbno_state(mp, agno, agbno))  {
+               case XR_E_INO:  /* already marked */
+                       break;
+               case XR_E_UNKNOWN:
+               case XR_E_FREE:
+               case XR_E_FREE1:
+                       set_agbno_state(mp, agno, agbno, XR_E_INO);
+                       break;
+               case XR_E_BAD_STATE:
+                       do_error(_("bad state in block map %d\n"), state);
+                       break;
+               default:
+                       set_agbno_state(mp, agno, agbno, XR_E_MULT);
+                       do_warn(_("inode block %llu multiply claimed, state was %d\n"),
+                               XFS_AGB_TO_FSB(mp, agno, agbno), state);
+                       break;
        }
        pthread_mutex_unlock(&ag_locks[agno]);
 
@@ -905,10 +906,10 @@ process_inode_chunk(
                         * done! - finished up irec and block simultaneously
                         */
                        for (bp_index = 0; bp_index < cluster_count; bp_index++) {
-                               pftrace("put/writebuf %p (%llu) in AG %d",
-                                       bplist[bp_index], (long long)
-                                       XFS_BUF_ADDR(bplist[bp_index]), agno);
-
+#ifdef XR_PF_TRACE
+                               pftrace("put/writebuf %p (%llu) in AG %d", bplist[bp_index],
+                                       (long long)XFS_BUF_ADDR(bplist[bp_index]), agno);
+#endif
                                if (dirty && !no_modify)
                                        libxfs_writebuf(bplist[bp_index], 0);
                                else
@@ -925,21 +926,20 @@ process_inode_chunk(
                        agbno++;
 
                        pthread_mutex_lock(&ag_locks[agno]);
-                       state = get_bmap(agno, agbno);
-                       switch (state) {
+                       switch (state = get_agbno_state(mp, agno, agbno))  {
                        case XR_E_INO:  /* already marked */
                                break;
                        case XR_E_UNKNOWN:
                        case XR_E_FREE:
                        case XR_E_FREE1:
-                               set_bmap(agno, agbno, XR_E_INO);
+                               set_agbno_state(mp, agno, agbno, XR_E_INO);
                                break;
                        case XR_E_BAD_STATE:
                                do_error(_("bad state in block map %d\n"),
                                        state);
                                break;
                        default:
-                               set_bmap(agno, agbno, XR_E_MULT);
+                               set_agbno_state(mp, agno, agbno, XR_E_MULT);
                                do_warn(_("inode block %llu multiply claimed, "
                                          "state was %d\n"),
                                        XFS_AGB_TO_FSB(mp, agno, agbno), state);
index bf04c6ee544c215c25b89b562871fae7f84917e0..9da721be464f84a78e3632e577649ae0303d6aa0 100644 (file)
@@ -524,7 +524,6 @@ process_rt_rec(
 
        /*
         * set the appropriate number of extents
-        * this iterates block by block, this can be optimised using extents
         */
        for (b = irec->br_startblock; b < irec->br_startblock +
                        irec->br_blockcount; b += mp->m_sb.sb_rextsize)  {
@@ -546,33 +545,40 @@ process_rt_rec(
                        continue;
                }
 
-               state = get_rtbmap(ext);
+               state = get_rtbno_state(mp, ext);
+
                switch (state)  {
-               case XR_E_FREE:
-               case XR_E_UNKNOWN:
-                       set_rtbmap(ext, XR_E_INUSE);
-                       break;
-               case XR_E_BAD_STATE:
-                       do_error(_("bad state in rt block map %llu\n"), ext);
-               case XR_E_FS_MAP:
-               case XR_E_INO:
-               case XR_E_INUSE_FS:
-                       do_error(_("data fork in rt inode %llu found "
-                               "metadata block %llu in rt bmap\n"),
-                               ino, ext);
-               case XR_E_INUSE:
-                       if (pwe)
+                       case XR_E_FREE:
+                       case XR_E_UNKNOWN:
+                               set_rtbno_state(mp, ext, XR_E_INUSE);
                                break;
-               case XR_E_MULT:
-                       set_rtbmap(ext, XR_E_MULT);
-                       do_warn(_("data fork in rt inode %llu claims "
-                                       "used rt block %llu\n"),
+
+                       case XR_E_BAD_STATE:
+                               do_error(_("bad state in rt block map %llu\n"),
+                                               ext);
+
+                       case XR_E_FS_MAP:
+                       case XR_E_INO:
+                       case XR_E_INUSE_FS:
+                               do_error(_("data fork in rt inode %llu found "
+                                       "metadata block %llu in rt bmap\n"),
                                        ino, ext);
-                       return 1;
-               case XR_E_FREE1:
-               default:
-                       do_error(_("illegal state %d in rt block map "
-                                       "%llu\n"), state, b);
+
+                       case XR_E_INUSE:
+                               if (pwe)
+                                       break;
+
+                       case XR_E_MULT:
+                               set_rtbno_state(mp, ext, XR_E_MULT);
+                               do_warn(_("data fork in rt inode %llu claims "
+                                               "used rt block %llu\n"),
+                                               ino, ext);
+                               return 1;
+
+                       case XR_E_FREE1:
+                       default:
+                               do_error(_("illegal state %d in rt block map "
+                                               "%llu\n"), state, b);
                }
        }
 
@@ -615,10 +621,9 @@ process_bmbt_reclist_int(
        char                    *forkname;
        int                     i;
        int                     state;
+       xfs_dfsbno_t            e;
        xfs_agnumber_t          agno;
        xfs_agblock_t           agbno;
-       xfs_agblock_t           ebno;
-       xfs_extlen_t            blen;
        xfs_agnumber_t          locked_agno = -1;
        int                     error = 1;
 
@@ -720,7 +725,7 @@ process_bmbt_reclist_int(
                 */
                agno = XFS_FSB_TO_AGNO(mp, irec.br_startblock);
                agbno = XFS_FSB_TO_AGBNO(mp, irec.br_startblock);
-               ebno = agbno + irec.br_blockcount;
+               e = irec.br_startblock + irec.br_blockcount;
                if (agno != locked_agno) {
                        if (locked_agno != -1)
                                pthread_mutex_unlock(&ag_locks[locked_agno]);
@@ -735,23 +740,38 @@ process_bmbt_reclist_int(
                         * checking each entry without setting the
                         * block bitmap
                         */
-                       if (search_dup_extent(agno, agbno, ebno)) {
-                               do_warn(_("%s fork in ino %llu claims "
-                                       "dup extent, off - %llu, "
-                                       "start - %llu, cnt %llu\n"),
-                                       forkname, ino, irec.br_startoff,
-                                       irec.br_startblock,
-                                       irec.br_blockcount);
-                               goto done;
+                       for (b = irec.br_startblock; b < e; b++, agbno++)  {
+                               if (search_dup_extent(mp, agno, agbno)) {
+                                       do_warn(_("%s fork in ino %llu claims "
+                                               "dup extent, off - %llu, "
+                                               "start - %llu, cnt %llu\n"),
+                                               forkname, ino, irec.br_startoff,
+                                               irec.br_startblock,
+                                               irec.br_blockcount);
+                                       goto done;
+                               }
                        }
                        *tot += irec.br_blockcount;
                        continue;
                }
 
-               for (b = irec.br_startblock;
-                    agbno < ebno;
-                    b += blen, agbno += blen) {
-                       state = get_bmap_ext(agno, agbno, ebno, &blen);
+               for (b = irec.br_startblock; b < e; b++, agbno++)  {
+                       /*
+                        * Process in chunks of 16 (XR_BB_UNIT/XR_BB)
+                        * for common XR_E_UNKNOWN to XR_E_INUSE transition
+                        */
+                       if (((agbno & XR_BB_MASK) == 0) && ((irec.br_startblock + irec.br_blockcount - b) >= (XR_BB_UNIT/XR_BB))) {
+                               if (ba_bmap[agno][agbno>>XR_BB] == XR_E_UNKNOWN_LL) {
+                                       ba_bmap[agno][agbno>>XR_BB] = XR_E_INUSE_LL;
+                                       agbno += (XR_BB_UNIT/XR_BB) - 1;
+                                       b += (XR_BB_UNIT/XR_BB) - 1;
+                                       continue;
+                               }
+
+                       }
+
+                       state = get_agbno_state(mp, agno, agbno);
+
                        switch (state)  {
                        case XR_E_FREE:
                        case XR_E_FREE1:
@@ -760,7 +780,7 @@ process_bmbt_reclist_int(
                                        forkname, ino, (__uint64_t) b);
                                /* fall through ... */
                        case XR_E_UNKNOWN:
-                               set_bmap_ext(agno, agbno, blen, XR_E_INUSE);
+                               set_agbno_state(mp, agno, agbno, XR_E_INUSE);
                                break;
 
                        case XR_E_BAD_STATE:
@@ -776,7 +796,7 @@ process_bmbt_reclist_int(
 
                        case XR_E_INUSE:
                        case XR_E_MULT:
-                               set_bmap_ext(agno, agbno, blen, XR_E_MULT);
+                               set_agbno_state(mp, agno, agbno, XR_E_MULT);
                                do_warn(_("%s fork in %s inode %llu claims "
                                        "used block %llu\n"),
                                        forkname, ftype, ino, (__uint64_t) b);
@@ -2030,7 +2050,7 @@ process_inode_data_fork(
                *nextents = 1;
 
        if (dinoc->di_format != XFS_DINODE_FMT_LOCAL && type != XR_INO_RTDATA)
-               *dblkmap = blkmap_alloc(*nextents, XFS_DATA_FORK);
+               *dblkmap = blkmap_alloc(*nextents);
        *nextents = 0;
 
        switch (dinoc->di_format) {
@@ -2152,14 +2172,14 @@ process_inode_attr_fork(
                err = process_lclinode(mp, agno, ino, dino, XFS_ATTR_FORK);
                break;
        case XFS_DINODE_FMT_EXTENTS:
-               ablkmap = blkmap_alloc(*anextents, XFS_ATTR_FORK);
+               ablkmap = blkmap_alloc(*anextents);
                *anextents = 0;
                err = process_exinode(mp, agno, ino, dino, type, dirty,
                                atotblocks, anextents, &ablkmap,
                                XFS_ATTR_FORK, check_dups);
                break;
        case XFS_DINODE_FMT_BTREE:
-               ablkmap = blkmap_alloc(*anextents, XFS_ATTR_FORK);
+               ablkmap = blkmap_alloc(*anextents);
                *anextents = 0;
                err = process_btinode(mp, agno, ino, dino, type, dirty,
                                atotblocks, anextents, &ablkmap,
index d0739fd40987765a107d841994b8d4aaafa94cf5..2723e3b9291b41666cbb6bc54120947123f54232 100644 (file)
@@ -103,19 +103,21 @@ da_read_buf(
                bplist = bparray;
        }
        for (i = 0; i < nex; i++) {
+#ifdef XR_PF_TRACE
                pftrace("about to read off %llu (len = %d)",
                        (long long)XFS_FSB_TO_DADDR(mp, bmp[i].startblock),
                        XFS_FSB_TO_BB(mp, bmp[i].blockcount));
-
+#endif
                bplist[i] = libxfs_readbuf(mp->m_dev,
                                XFS_FSB_TO_DADDR(mp, bmp[i].startblock),
                                XFS_FSB_TO_BB(mp, bmp[i].blockcount), 0);
                if (!bplist[i])
                        goto failed;
-
+#ifdef XR_PF_TRACE
                pftrace("readbuf %p (%llu, %d)", bplist[i],
                        (long long)XFS_BUF_ADDR(bplist[i]),
                        XFS_BUF_COUNT(bplist[i]));
+#endif
        }
        dabuf = malloc(XFS_DA_BUF_SIZE(nex));
        if (dabuf == NULL) {
@@ -246,8 +248,10 @@ da_brelse(
        }
        da_buf_done(dabuf);
        for (i = 0; i < nbuf; i++) {
+#ifdef XR_PF_TRACE
                pftrace("putbuf %p (%llu)", bplist[i],
                                        (long long)XFS_BUF_ADDR(bplist[i]));
+#endif
                libxfs_putbuf(bplist[i]);
        }
        if (bplist != &bp)
@@ -534,7 +538,7 @@ verify_final_dir2_path(xfs_mount_t  *mp,
        /*
         * bail out if this is the root block (top of tree)
         */
-       if (this_level >= cursor->active)
+       if (this_level >= cursor->active)  
                return(0);
        /*
         * set hashvalue to correctl reflect the now-validated
@@ -1421,7 +1425,7 @@ process_dir2_data(
                 * numbers.  Do NOT touch the name until after we've computed
                 * the hashvalue and done a namecheck() on the name.
                 *
-                * Conditions must either set clearino to zero or set
+                * Conditions must either set clearino to zero or set 
                 * clearreason why it's being cleared.
                 */
                if (!ino_discovery && ent_ino == BADFSINO) {
@@ -1452,7 +1456,7 @@ process_dir2_data(
                                if (ino_discovery) {
                                        add_inode_uncertain(mp, ent_ino, 0);
                                        clearino = 0;
-                               } else
+                               } else 
                                        clearreason = _("non-existent");
                        } else {
                                /*
index 5fb8149b4422af26d88a85402181f59b7271d670..9a78caee5d84a8d45bffb4d9bb60ff34f3bef92a 100644 (file)
@@ -156,6 +156,11 @@ EXTERN int         chunks_pblock;  /* # of 64-ino chunks per allocation */
 EXTERN int             max_symlink_blocks;
 EXTERN __int64_t       fs_max_file_offset;
 
+/* block allocation bitmaps */
+
+EXTERN __uint64_t      **ba_bmap;      /* see incore.h */
+EXTERN __uint64_t      *rt_ba_bmap;    /* see incore.h */
+
 /* realtime info */
 
 EXTERN xfs_rtword_t    *btmcompute;
@@ -194,6 +199,10 @@ EXTERN pthread_mutex_t     *ag_locks;
 EXTERN int             report_interval;
 EXTERN __uint64_t      *prog_rpt_done;
 
+#ifdef XR_PF_TRACE
+EXTERN FILE            *pf_trace_file;
+#endif
+
 EXTERN int             ag_stride;
 EXTERN int             thread_count;
 
index 682a3db8669090820e983794dea786e7de97b315..27604e27c9c9b9b49bc039b03dfba376499c4aa3 100644 (file)
@@ -18,7 +18,6 @@
 
 #include <libxfs.h>
 #include "avl.h"
-#include "btree.h"
 #include "globals.h"
 #include "incore.h"
 #include "agheader.h"
 #include "err_protos.h"
 #include "threads.h"
 
-/*
- * The following manages the in-core bitmap of the entire filesystem
- * using extents in a btree.
- *
- * The btree items will point to one of the state values below,
- * rather than storing the value itself in the pointer.
- */
-static int states[16] =
-       {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
+/* ba bmap setupstuff.  setting/getting state is in incore.h  */
 
-static struct btree_root       **ag_bmap;
-
-static void
-update_bmap(
-       struct btree_root       *bmap,
-       unsigned long           offset,
-       xfs_extlen_t            blen,
-       void                    *new_state)
+void
+setup_bmap(xfs_agnumber_t agno, xfs_agblock_t numblocks, xfs_drtbno_t rtblocks)
 {
-       unsigned long           end = offset + blen;
-       int                     *cur_state;
-       unsigned long           cur_key;
-       int                     *next_state;
-       unsigned long           next_key;
-       int                     *prev_state;
-
-       cur_state = btree_find(bmap, offset, &cur_key);
-       if (!cur_state)
-               return;
+       int i;
+       size_t size = 0;
 
-       if (offset == cur_key) {
-               /* if the start is the same as the "item" extent */
-               if (cur_state == new_state)
-                       return;
-
-               /*
-                * Note: this may be NULL if we are updating the map for
-                * the superblock.
-                */
-               prev_state = btree_peek_prev(bmap, NULL);
-
-               next_state = btree_peek_next(bmap, &next_key);
-               if (next_key > end) {
-                       /* different end */
-                       if (new_state == prev_state) {
-                               /* #1: prev has same state, move offset up */
-                               btree_update_key(bmap, offset, end);
-                               return;
-                       }
-
-                       /* #4: insert new extent after, update current value */
-                       btree_update_value(bmap, offset, new_state);
-                       btree_insert(bmap, end, cur_state);
-                       return;
-               }
+       ba_bmap = (__uint64_t**)malloc(agno*sizeof(__uint64_t *));
+       if (!ba_bmap)
+               do_error(_("couldn't allocate block map pointers\n"));
+       ag_locks = malloc(agno * sizeof(pthread_mutex_t));
+       if (!ag_locks)
+               do_error(_("couldn't allocate block map locks\n"));
 
-               /* same end (and same start) */
-               if (new_state == next_state) {
-                       /* next has same state */
-                       if (new_state == prev_state) {
-                               /* #3: merge prev & next */
-                               btree_delete(bmap, offset);
-                               btree_delete(bmap, end);
-                               return;
-                       }
-
-                       /* #8: merge next */
-                       btree_update_value(bmap, offset, new_state);
-                       btree_delete(bmap, end);
-                       return;
-               }
+       for (i = 0; i < agno; i++)  {
+               size = roundup((numblocks+(NBBY/XR_BB)-1) / (NBBY/XR_BB),
+                               sizeof(__uint64_t));
 
-               /* same start, same end, next has different state */
-               if (new_state == prev_state) {
-                       /* #5: prev has same state */
-                       btree_delete(bmap, offset);
+               ba_bmap[i] = (__uint64_t*)memalign(sizeof(__uint64_t), size);
+               if (!ba_bmap[i]) {
+                       do_error(_("couldn't allocate block map, size = %d\n"),
+                               numblocks);
                        return;
                }
+               memset(ba_bmap[i], 0, size);
+               pthread_mutex_init(&ag_locks[i], NULL);
+       }
 
-               /* #6: update value only */
-               btree_update_value(bmap, offset, new_state);
+       if (rtblocks == 0)  {
+               rt_ba_bmap = NULL;
                return;
        }
 
-       /* different start, offset is in the middle of "cur" */
-       prev_state = btree_peek_prev(bmap, NULL);
-       ASSERT(prev_state != NULL);
-       if (prev_state == new_state)
-               return;
+       size = roundup(rtblocks / (NBBY/XR_BB), sizeof(__uint64_t));
 
-       if (end == cur_key) {
-               /* end is at the same point as the current extent */
-               if (new_state == cur_state) {
-                       /* #7: move next extent down */
-                       btree_update_key(bmap, end, offset);
+       rt_ba_bmap=(__uint64_t*)memalign(sizeof(__uint64_t), size);
+       if (!rt_ba_bmap) {
+                       do_error(
+               _("couldn't allocate realtime block map, size = %llu\n"),
+                               rtblocks);
                        return;
-               }
-
-               /* #9: different start, same end, add new extent */
-               btree_insert(bmap, offset, new_state);
-               return;
        }
 
-       /* #2: insert an extent into the middle of another extent */
-       btree_insert(bmap, offset, new_state);
-       btree_insert(bmap, end, prev_state);
+       /*
+        * start all real-time as free blocks
+        */
+       set_bmap_rt(rtblocks);
+
+       return;
 }
 
+/* ARGSUSED */
 void
-set_bmap_ext(
-       xfs_agnumber_t          agno,
-       xfs_agblock_t           agbno,
-       xfs_extlen_t            blen,
-       int                     state)
+teardown_rt_bmap(xfs_mount_t *mp)
 {
-       update_bmap(ag_bmap[agno], agbno, blen, &states[state]);
+       if (rt_ba_bmap != NULL)  {
+               free(rt_ba_bmap);
+               rt_ba_bmap = NULL;
+       }
+
+       return;
 }
 
-int
-get_bmap_ext(
-       xfs_agnumber_t          agno,
-       xfs_agblock_t           agbno,
-       xfs_agblock_t           maxbno,
-       xfs_extlen_t            *blen)
+/* ARGSUSED */
+void
+teardown_ag_bmap(xfs_mount_t *mp, xfs_agnumber_t agno)
 {
-       int                     *statep;
-       unsigned long           key;
-
-       statep = btree_find(ag_bmap[agno], agbno, &key);
-       if (!statep)
-               return -1;
-
-       if (key == agbno) {
-               if (blen) {
-                       if (!btree_peek_next(ag_bmap[agno], &key))
-                               return -1;
-                       *blen = MIN(maxbno, key) - agbno;
-               }
-               return *statep;
-       }
+       ASSERT(ba_bmap[agno] != NULL);
 
-       statep = btree_peek_prev(ag_bmap[agno], NULL);
-       if (!statep)
-               return -1;
-       if (blen)
-               *blen = MIN(maxbno, key) - agbno;
+       free(ba_bmap[agno]);
+       ba_bmap[agno] = NULL;
 
-       return *statep;
+       return;
 }
 
-static uint64_t                *rt_bmap;
-static size_t          rt_bmap_size;
-
-/* block records fit into __uint64_t's units */
-#define XR_BB_UNIT     64                      /* number of bits/unit */
-#define XR_BB          4                       /* bits per block record */
-#define XR_BB_NUM      (XR_BB_UNIT/XR_BB)      /* number of records per unit */
-#define XR_BB_MASK     0xF                     /* block record mask */
-
-/*
- * these work in real-time extents (e.g. fsbno == rt extent number)
- */
-int
-get_rtbmap(
-       xfs_drtbno_t    bno)
+/* ARGSUSED */
+void
+teardown_bmap_finish(xfs_mount_t *mp)
 {
-       return (*(rt_bmap + bno /  XR_BB_NUM) >>
-               ((bno % XR_BB_NUM) * XR_BB)) & XR_BB_MASK;
+       free(ba_bmap);
+       ba_bmap = NULL;
+
+       return;
 }
 
 void
-set_rtbmap(
-       xfs_drtbno_t    bno,
-       int             state)
+teardown_bmap(xfs_mount_t *mp)
 {
-       *(rt_bmap + bno / XR_BB_NUM) =
-        ((*(rt_bmap + bno / XR_BB_NUM) &
-         (~((__uint64_t) XR_BB_MASK << ((bno % XR_BB_NUM) * XR_BB)))) |
-        (((__uint64_t) state) << ((bno % XR_BB_NUM) * XR_BB)));
+       xfs_agnumber_t i;
+
+       for (i = 0; i < mp->m_sb.sb_agcount; i++)  {
+               teardown_ag_bmap(mp, i);
+       }
+
+       teardown_rt_bmap(mp);
+       teardown_bmap_finish(mp);
+
+       return;
 }
 
-static void
-reset_rt_bmap(void)
+/*
+ * block map initialization routines -- realtime, log, fs
+ */
+void
+set_bmap_rt(xfs_drtbno_t num)
 {
-       if (rt_bmap)
-               memset(rt_bmap, 0x22, rt_bmap_size);    /* XR_E_FREE */
+       xfs_drtbno_t j;
+       xfs_drtbno_t size;
+
+       /*
+        * for now, initialize all realtime blocks to be free
+        * (state == XR_E_FREE)
+        */
+       size = howmany(num / (NBBY/XR_BB), sizeof(__uint64_t));
+
+       for (j = 0; j < size; j++)
+               rt_ba_bmap[j] = 0x2222222222222222LL;
+
+       return;
 }
 
-static void
-init_rt_bmap(
-       xfs_mount_t     *mp)
+void
+set_bmap_log(xfs_mount_t *mp)
 {
-       if (mp->m_sb.sb_rextents == 0)
+       xfs_dfsbno_t    logend, i;
+
+       if (mp->m_sb.sb_logstart == 0)
                return;
 
-       rt_bmap_size = roundup(mp->m_sb.sb_rextents / (NBBY / XR_BB),
-                              sizeof(__uint64_t));
+       logend = mp->m_sb.sb_logstart + mp->m_sb.sb_logblocks;
 
-       rt_bmap = memalign(sizeof(__uint64_t), rt_bmap_size);
-       if (!rt_bmap) {
-               do_error(
-               _("couldn't allocate realtime block map, size = %llu\n"),
-                       mp->m_sb.sb_rextents);
-               return;
+       for (i = mp->m_sb.sb_logstart; i < logend ; i++)  {
+               set_fsbno_state(mp, i, XR_E_INUSE_FS);
        }
+
+       return;
 }
 
-static void
-free_rt_bmap(xfs_mount_t *mp)
+void
+set_bmap_fs(xfs_mount_t *mp)
 {
-       free(rt_bmap);
-       rt_bmap = NULL;
-}
+       xfs_agnumber_t  i;
+       xfs_agblock_t   j;
+       xfs_agblock_t   end;
+
+       /*
+        * AG header is 4 sectors
+        */
+       end = howmany(4 * mp->m_sb.sb_sectsize, mp->m_sb.sb_blocksize);
+
+       for (i = 0; i < mp->m_sb.sb_agcount; i++)
+               for (j = 0; j < end; j++)
+                       set_agbno_state(mp, i, j, XR_E_INUSE_FS);
 
+       return;
+}
 
+#if 0
 void
-reset_bmaps(xfs_mount_t *mp)
+set_bmap_fs_bt(xfs_mount_t *mp)
 {
-       xfs_agnumber_t  agno;
-       xfs_agblock_t   ag_size;
-       int             ag_hdr_block;
-
-       ag_hdr_block = howmany(4 * mp->m_sb.sb_sectsize, mp->m_sb.sb_blocksize);
-       ag_size = mp->m_sb.sb_agblocks;
-
-       for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
-               if (agno == mp->m_sb.sb_agcount - 1)
-                       ag_size = (xfs_extlen_t)(mp->m_sb.sb_dblocks -
-                                  (xfs_drfsbno_t)mp->m_sb.sb_agblocks * agno);
-#ifdef BTREE_STATS
-               if (btree_find(ag_bmap[agno], 0, NULL)) {
-                       printf("ag_bmap[%d] btree stats:\n", i);
-                       btree_print_stats(ag_bmap[agno], stdout);
-               }
-#endif
+       xfs_agnumber_t  i;
+       xfs_agblock_t   j;
+       xfs_agblock_t   begin;
+       xfs_agblock_t   end;
+
+       begin = bnobt_root;
+       end = inobt_root + 1;
+
+       for (i = 0; i < mp->m_sb.sb_agcount; i++)  {
                /*
-                * We always insert an item for the first block having a
-                * given state.  So the code below means:
-                *
-                *      block 0..ag_hdr_block-1:        XR_E_INUSE_FS
-                *      ag_hdr_block..ag_size:          XR_E_UNKNOWN
-                *      ag_size...                      XR_E_BAD_STATE
+                * account for btree roots
                 */
-               btree_clear(ag_bmap[agno]);
-               btree_insert(ag_bmap[agno], 0, &states[XR_E_INUSE_FS]);
-               btree_insert(ag_bmap[agno],
-                               ag_hdr_block, &states[XR_E_UNKNOWN]);
-               btree_insert(ag_bmap[agno], ag_size, &states[XR_E_BAD_STATE]);
+               for (j = begin; j < end; j++)
+                       set_agbno_state(mp, i, j, XR_E_INUSE_FS);
        }
 
-       if (mp->m_sb.sb_logstart != 0) {
-               set_bmap_ext(XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart),
-                            XFS_FSB_TO_AGBNO(mp, mp->m_sb.sb_logstart),
-                            mp->m_sb.sb_logblocks, XR_E_INUSE_FS);
-       }
-
-       reset_rt_bmap();
+       return;
 }
+#endif
 
 void
-init_bmaps(xfs_mount_t *mp)
+incore_init(xfs_mount_t *mp)
 {
-       xfs_agnumber_t i;
+       int agcount = mp->m_sb.sb_agcount;
+       extern void incore_ino_init(xfs_mount_t *);
+       extern void incore_ext_init(xfs_mount_t *);
 
-       ag_bmap = calloc(mp->m_sb.sb_agcount, sizeof(struct btree_root *));
-       if (!ag_bmap)
-               do_error(_("couldn't allocate block map btree roots\n"));
+       /* init block alloc bmap */
 
-       ag_locks = calloc(mp->m_sb.sb_agcount, sizeof(pthread_mutex_t));
-       if (!ag_locks)
-               do_error(_("couldn't allocate block map locks\n"));
+       setup_bmap(agcount, mp->m_sb.sb_agblocks, mp->m_sb.sb_rextents);
+       incore_ino_init(mp);
+       incore_ext_init(mp);
 
-       for (i = 0; i < mp->m_sb.sb_agcount; i++)  {
-               btree_init(&ag_bmap[i]);
-               pthread_mutex_init(&ag_locks[i], NULL);
-       }
+       /* initialize random globals now that we know the fs geometry */
 
-       init_rt_bmap(mp);
-       reset_bmaps(mp);
+       inodes_per_block = mp->m_sb.sb_inopblock;
+
+       return;
 }
 
-void
-free_bmaps(xfs_mount_t *mp)
+#if defined(XR_BMAP_TRACE) || defined(XR_BMAP_DBG)
+int
+get_agbno_state(xfs_mount_t *mp, xfs_agnumber_t agno,
+               xfs_agblock_t ag_blockno)
 {
-       xfs_agnumber_t i;
+       __uint64_t *addr;
 
-       for (i = 0; i < mp->m_sb.sb_agcount; i++)
-               btree_destroy(ag_bmap[i]);
-       free(ag_bmap);
-       ag_bmap = NULL;
+       addr = ba_bmap[(agno)] + (ag_blockno)/XR_BB_NUM;
 
-       free_rt_bmap(mp);
+       return((*addr >> (((ag_blockno)%XR_BB_NUM)*XR_BB)) & XR_BB_MASK);
 }
+
+void set_agbno_state(xfs_mount_t *mp, xfs_agnumber_t agno,
+       xfs_agblock_t ag_blockno, int state)
+{
+       __uint64_t *addr;
+
+       addr = ba_bmap[(agno)] + (ag_blockno)/XR_BB_NUM;
+
+       *addr = (((*addr) &
+         (~((__uint64_t) XR_BB_MASK << (((ag_blockno)%XR_BB_NUM)*XR_BB)))) |
+        (((__uint64_t) (state)) << (((ag_blockno)%XR_BB_NUM)*XR_BB)));
+}
+
+int
+get_fsbno_state(xfs_mount_t *mp, xfs_dfsbno_t blockno)
+{
+       return(get_agbno_state(mp, XFS_FSB_TO_AGNO(mp, blockno),
+                       XFS_FSB_TO_AGBNO(mp, blockno)));
+}
+
+void
+set_fsbno_state(xfs_mount_t *mp, xfs_dfsbno_t blockno, int state)
+{
+       set_agbno_state(mp, XFS_FSB_TO_AGNO(mp, blockno),
+               XFS_FSB_TO_AGBNO(mp, blockno), state);
+
+       return;
+}
+#endif
index 99853fb0b9a11ff90f1fdbcb036bf98fb0c5e7e6..a22ef0fb06b20008623503015f67e4c267a989e0 100644 (file)
 #define XFS_REPAIR_INCORE_H
 
 #include "avl.h"
-
-
 /*
  * contains definition information.  implementation (code)
  * is spread out in separate files.
  */
 
 /*
- * block map -- track state of each filesystem block.
+ * block bit map defs -- track state of each filesystem block.
+ * ba_bmap is an array of bitstrings declared in the globals.h file.
+ * the bitstrings are broken up into 64-bit chunks.  one bitstring per AG.
  */
+#define BA_BMAP_SIZE(x)                (howmany(x, 4))
 
-void           init_bmaps(xfs_mount_t *mp);
-void           reset_bmaps(xfs_mount_t *mp);
-void           free_bmaps(xfs_mount_t *mp);
+void                   set_bmap_rt(xfs_drfsbno_t numblocks);
+void                   set_bmap_log(xfs_mount_t *mp);
+void                   set_bmap_fs(xfs_mount_t *mp);
+void                   teardown_bmap(xfs_mount_t *mp);
 
-void           set_bmap_ext(xfs_agnumber_t agno, xfs_agblock_t agbno,
-                            xfs_extlen_t blen, int state);
-int            get_bmap_ext(xfs_agnumber_t agno, xfs_agblock_t agbno,
-                            xfs_agblock_t maxbno, xfs_extlen_t *blen);
+void                   teardown_rt_bmap(xfs_mount_t *mp);
+void                   teardown_ag_bmap(xfs_mount_t *mp, xfs_agnumber_t agno);
+void                   teardown_bmap_finish(xfs_mount_t *mp);
 
-void           set_rtbmap(xfs_drtbno_t bno, int state);
-int            get_rtbmap(xfs_drtbno_t bno);
+/* blocks are numbered from zero */
 
-static inline void
-set_bmap(xfs_agnumber_t agno, xfs_agblock_t agbno, int state)
-{
-       set_bmap_ext(agno, agbno, 1, state);
-}
+/* block records fit into __uint64_t's units */
+
+#define XR_BB_UNIT     64                      /* number of bits/unit */
+#define XR_BB          4                       /* bits per block record */
+#define XR_BB_NUM      (XR_BB_UNIT/XR_BB)      /* number of records per unit */
+#define XR_BB_MASK     0xF                     /* block record mask */
+
+/*
+ * bitstring ops -- set/get block states, either in filesystem
+ * bno's or in agbno's.  turns out that fsbno addressing is
+ * more convenient when dealing with bmap extracted addresses
+ * and agbno addressing is more convenient when dealing with
+ * meta-data extracted addresses.  So the fsbno versions use
+ * mtype (which can be one of the block map types above) to
+ * set the correct block map while the agbno versions assume
+ * you want to use the regular block map.
+ */
+
+#if defined(XR_BMAP_TRACE) || defined(XR_BMAP_DBG)
+/*
+ * implemented as functions for debugging purposes
+ */
+int get_agbno_state(xfs_mount_t *mp, xfs_agnumber_t agno,
+       xfs_agblock_t ag_blockno);
+void set_agbno_state(xfs_mount_t *mp, xfs_agnumber_t agno,
+       xfs_agblock_t ag_blockno, int state);
+
+int get_fsbno_state(xfs_mount_t *mp, xfs_dfsbno_t blockno);
+void set_fsbno_state(xfs_mount_t *mp, xfs_dfsbno_t blockno, int state);
+#else
+/*
+ * implemented as macros for performance purposes
+ */
+
+#define get_agbno_state(mp, agno, ag_blockno) \
+                       ((int) (*(ba_bmap[(agno)] + (ag_blockno)/XR_BB_NUM) \
+                                >> (((ag_blockno)%XR_BB_NUM)*XR_BB)) \
+                               & XR_BB_MASK)
+#define set_agbno_state(mp, agno, ag_blockno, state) \
+       *(ba_bmap[(agno)] + (ag_blockno)/XR_BB_NUM) = \
+               ((*(ba_bmap[(agno)] + (ag_blockno)/XR_BB_NUM) & \
+         (~((__uint64_t) XR_BB_MASK << (((ag_blockno)%XR_BB_NUM)*XR_BB)))) | \
+        (((__uint64_t) (state)) << (((ag_blockno)%XR_BB_NUM)*XR_BB)))
+
+#define get_fsbno_state(mp, blockno) \
+               get_agbno_state(mp, XFS_FSB_TO_AGNO(mp, (blockno)), \
+                               XFS_FSB_TO_AGBNO(mp, (blockno)))
+#define set_fsbno_state(mp, blockno, state) \
+               set_agbno_state(mp, XFS_FSB_TO_AGNO(mp, (blockno)), \
+                       XFS_FSB_TO_AGBNO(mp, (blockno)), (state))
+
+
+#define get_agbno_rec(mp, agno, ag_blockno) \
+                       (*(ba_bmap[(agno)] + (ag_blockno)/XR_BB_NUM))
+#endif /* XR_BMAP_TRACE */
+
+/*
+ * these work in real-time extents (e.g. fsbno == rt extent number)
+ */
+#define get_rtbno_state(mp, fsbno) \
+                       ((*(rt_ba_bmap + (fsbno)/XR_BB_NUM) >> \
+                       (((fsbno)%XR_BB_NUM)*XR_BB)) & XR_BB_MASK)
+#define set_rtbno_state(mp, fsbno, state) \
+       *(rt_ba_bmap + (fsbno)/XR_BB_NUM) = \
+        ((*(rt_ba_bmap + (fsbno)/XR_BB_NUM) & \
+         (~((__uint64_t) XR_BB_MASK << (((fsbno)%XR_BB_NUM)*XR_BB)))) | \
+        (((__uint64_t) (state)) << (((fsbno)%XR_BB_NUM)*XR_BB)))
 
-static inline int
-get_bmap(xfs_agnumber_t agno, xfs_agblock_t agbno)
-{
-       return get_bmap_ext(agno, agbno, agbno + 1, NULL);
-}
 
 /*
  * extent tree definitions
@@ -170,11 +227,23 @@ get_bcnt_extent(xfs_agnumber_t agno, xfs_agblock_t startblock,
 /*
  * duplicate extent tree functions
  */
+void           add_dup_extent(xfs_agnumber_t agno,
+                               xfs_agblock_t startblock,
+                               xfs_extlen_t blockcount);
+
+extern avltree_desc_t   **extent_tree_ptrs;
+/* ARGSUSED */
+static inline int
+search_dup_extent(xfs_mount_t *mp, xfs_agnumber_t agno, xfs_agblock_t agbno)
+{
+       ASSERT(agno < glob_agcount);
+
+       if (avl_findrange(extent_tree_ptrs[agno], agbno) != NULL)
+               return(1);
+
+       return(0);
+}
 
-int            add_dup_extent(xfs_agnumber_t agno, xfs_agblock_t startblock,
-                       xfs_extlen_t blockcount);
-int            search_dup_extent(xfs_agnumber_t agno,
-                       xfs_agblock_t start_agbno, xfs_agblock_t end_agbno);
 void           add_rt_dup_extent(xfs_drtbno_t  startblock,
                                xfs_extlen_t    blockcount);
 
@@ -202,7 +271,6 @@ void                release_agbcnt_extent_tree(xfs_agnumber_t agno);
  */
 void           free_rt_dup_extent_tree(xfs_mount_t *mp);
 
-void           incore_ext_init(xfs_mount_t *);
 /*
  * per-AG extent trees shutdown routine -- all (bno, bcnt and dup)
  * at once.  this one actually frees the memory instead of just recyling
@@ -210,8 +278,6 @@ void                incore_ext_init(xfs_mount_t *);
  */
 void           incore_ext_teardown(xfs_mount_t *mp);
 
-void           incore_ino_init(xfs_mount_t *);
-
 /*
  * inode definitions
  */
index a362e5a6ae70d3ac47636ec9b4f955820c9c7dee..d0b8cdc4ba20a2a78326d201c7bcda66e3966968 100644 (file)
@@ -18,7 +18,6 @@
 
 #include <libxfs.h>
 #include "avl.h"
-#include "btree.h"
 #include "globals.h"
 #include "incore.h"
 #include "agheader.h"
@@ -73,8 +72,8 @@ static rt_ext_flist_t rt_ext_flist;
 
 static avl64tree_desc_t        *rt_ext_tree_ptr;       /* dup extent tree for rt */
 
-static struct btree_root **dup_extent_trees;   /* per ag dup extent trees */
-
+avltree_desc_t **extent_tree_ptrs;             /* array of extent tree ptrs */
+                                               /* one per ag for dups */
 static avltree_desc_t  **extent_bno_ptrs;      /*
                                                 * array of extent tree ptrs
                                                 * one per ag for free extents
@@ -100,48 +99,6 @@ static pthread_mutex_t      ext_flist_lock;
 static pthread_mutex_t rt_ext_tree_lock;
 static pthread_mutex_t rt_ext_flist_lock;
 
-/*
- * duplicate extent tree functions
- */
-
-void
-release_dup_extent_tree(
-       xfs_agnumber_t          agno)
-{
-       btree_clear(dup_extent_trees[agno]);
-}
-
-int
-add_dup_extent(
-       xfs_agnumber_t          agno,
-       xfs_agblock_t           startblock,
-       xfs_extlen_t            blockcount)
-{
-#ifdef XR_DUP_TRACE
-       fprintf(stderr, "Adding dup extent - %d/%d %d\n", agno, startblock,
-               blockcount);
-#endif
-       return btree_insert(dup_extent_trees[agno], startblock,
-                               (void *)(uintptr_t)(startblock + blockcount));
-}
-
-int
-search_dup_extent(
-       xfs_agnumber_t          agno,
-       xfs_agblock_t           start_agbno,
-       xfs_agblock_t           end_agbno)
-{
-       unsigned long   bno;
-
-       if (!btree_find(dup_extent_trees[agno], start_agbno, &bno))
-               return 0;       /* this really shouldn't happen */
-       if (bno < end_agbno)
-               return 1;
-       return (uintptr_t)btree_peek_prev(dup_extent_trees[agno], NULL) >
-                                                               start_agbno;
-}
-
-
 /*
  * extent tree stuff is avl trees of duplicate extents,
  * sorted in order by block number.  there is one tree per ag.
@@ -253,6 +210,14 @@ release_extent_tree(avltree_desc_t *tree)
 /*
  * top-level (visible) routines
  */
+void
+release_dup_extent_tree(xfs_agnumber_t agno)
+{
+       release_extent_tree(extent_tree_ptrs[agno]);
+
+       return;
+}
+
 void
 release_agbno_extent_tree(xfs_agnumber_t agno)
 {
@@ -557,6 +522,93 @@ get_bcnt_extent(xfs_agnumber_t agno, xfs_agblock_t startblock,
        return(ext);
 }
 
+/*
+ * the next 2 routines manage the trees of duplicate extents -- 1 tree
+ * per AG
+ */
+void
+add_dup_extent(xfs_agnumber_t agno, xfs_agblock_t startblock,
+               xfs_extlen_t blockcount)
+{
+       extent_tree_node_t *first, *last, *ext, *next_ext;
+       xfs_agblock_t new_startblock;
+       xfs_extlen_t new_blockcount;
+
+       ASSERT(agno < glob_agcount);
+
+#ifdef XR_DUP_TRACE
+       fprintf(stderr, "Adding dup extent - %d/%d %d\n", agno, startblock, blockcount);
+#endif
+       avl_findranges(extent_tree_ptrs[agno], startblock - 1,
+               startblock + blockcount + 1,
+               (avlnode_t **) &first, (avlnode_t **) &last);
+       /*
+        * find adjacent and overlapping extent blocks
+        */
+       if (first == NULL && last == NULL)  {
+               /* nothing, just make and insert new extent */
+
+               ext = mk_extent_tree_nodes(startblock, blockcount, XR_E_MULT);
+
+               if (avl_insert(extent_tree_ptrs[agno],
+                               (avlnode_t *) ext) == NULL)  {
+                       do_error(_("duplicate extent range\n"));
+               }
+
+               return;
+       }
+
+       ASSERT(first != NULL && last != NULL);
+
+       /*
+        * find the new composite range, delete old extent nodes
+        * as we go
+        */
+       new_startblock = startblock;
+       new_blockcount = blockcount;
+
+       for (ext = first;
+               ext != (extent_tree_node_t *) last->avl_node.avl_nextino;
+               ext = next_ext)  {
+               /*
+                * preserve the next inorder node
+                */
+               next_ext = (extent_tree_node_t *) ext->avl_node.avl_nextino;
+               /*
+                * just bail if the new extent is contained within an old one
+                */
+               if (ext->ex_startblock <= startblock &&
+                               ext->ex_blockcount >= blockcount)
+                       return;
+               /*
+                * now check for overlaps and adjacent extents
+                */
+               if (ext->ex_startblock + ext->ex_blockcount >= startblock
+                       || ext->ex_startblock <= startblock + blockcount)  {
+
+                       if (ext->ex_startblock < new_startblock)
+                               new_startblock = ext->ex_startblock;
+
+                       if (ext->ex_startblock + ext->ex_blockcount >
+                                       new_startblock + new_blockcount)
+                               new_blockcount = ext->ex_startblock +
+                                                       ext->ex_blockcount -
+                                                       new_startblock;
+
+                       avl_delete(extent_tree_ptrs[agno], (avlnode_t *) ext);
+                       continue;
+               }
+       }
+
+       ext = mk_extent_tree_nodes(new_startblock, new_blockcount, XR_E_MULT);
+
+       if (avl_insert(extent_tree_ptrs[agno], (avlnode_t *) ext) == NULL)  {
+               do_error(_("duplicate extent range\n"));
+       }
+
+       return;
+}
+
 static __psunsigned_t
 avl_ext_start(avlnode_t *node)
 {
@@ -852,9 +904,10 @@ incore_ext_init(xfs_mount_t *mp)
        pthread_mutex_init(&rt_ext_tree_lock, NULL);
        pthread_mutex_init(&rt_ext_flist_lock, NULL);
 
-       dup_extent_trees = calloc(agcount, sizeof(struct btree_root *));
-       if (!dup_extent_trees)
-               do_error(_("couldn't malloc dup extent tree descriptor table\n"));
+       if ((extent_tree_ptrs = malloc(agcount *
+                                       sizeof(avltree_desc_t *))) == NULL)
+               do_error(
+       _("couldn't malloc dup extent tree descriptor table\n"));
 
        if ((extent_bno_ptrs = malloc(agcount *
                                        sizeof(avltree_desc_t *))) == NULL)
@@ -867,6 +920,10 @@ incore_ext_init(xfs_mount_t *mp)
        _("couldn't malloc free by-bcnt extent tree descriptor table\n"));
 
        for (i = 0; i < agcount; i++)  {
+               if ((extent_tree_ptrs[i] =
+                               malloc(sizeof(avltree_desc_t))) == NULL)
+                       do_error(
+                       _("couldn't malloc dup extent tree descriptor\n"));
                if ((extent_bno_ptrs[i] =
                                malloc(sizeof(avltree_desc_t))) == NULL)
                        do_error(
@@ -878,7 +935,7 @@ incore_ext_init(xfs_mount_t *mp)
        }
 
        for (i = 0; i < agcount; i++)  {
-               btree_init(&dup_extent_trees[i]);
+               avl_init_tree(extent_tree_ptrs[i], &avl_extent_tree_ops);
                avl_init_tree(extent_bno_ptrs[i], &avl_extent_tree_ops);
                avl_init_tree(extent_bcnt_ptrs[i], &avl_extent_bcnt_tree_ops);
        }
@@ -907,18 +964,18 @@ incore_ext_teardown(xfs_mount_t *mp)
                free(cur);
 
        for (i = 0; i < mp->m_sb.sb_agcount; i++)  {
-               btree_destroy(dup_extent_trees[i]);
+               free(extent_tree_ptrs[i]);
                free(extent_bno_ptrs[i]);
                free(extent_bcnt_ptrs[i]);
        }
 
-       free(dup_extent_trees);
        free(extent_bcnt_ptrs);
        free(extent_bno_ptrs);
+       free(extent_tree_ptrs);
 
-       dup_extent_trees = NULL;
-       extent_bcnt_ptrs = NULL;
-       extent_bno_ptrs = NULL;
+       extent_bcnt_ptrs = extent_bno_ptrs = extent_tree_ptrs = NULL;
+
+       return;
 }
 
 int
index 654c406f09ec5bc7f479acd8685f0d83c61b1722..7e5052c48acdd675b92f8f01b13b5deb0e7d0431 100644 (file)
 #include "pthread.h"
 #include "avl.h"
 #include "dir.h"
-#include "bmap.h"
 #include "incore.h"
 #include "prefetch.h"
+#include "radix-tree.h"
 #include <sys/resource.h>
 
-/* TODO: dirbuf/freemap key usage is completely b0rked - only used for dirv1 */
 static pthread_key_t dirbuf_key;
 static pthread_key_t dir_freemap_key;
 static pthread_key_t attr_freemap_key;
 
-extern pthread_key_t dblkmap_key;
-extern pthread_key_t ablkmap_key;
-
 static void
 ts_alloc(pthread_key_t key, unsigned n, size_t size)
 {
        void *voidp;
-       voidp = calloc(n, size);
+       voidp = malloc((n)*(size));
        if (voidp == NULL) {
                do_error(_("ts_alloc: cannot allocate thread specific storage\n"));
                /* NO RETURN */
@@ -57,9 +53,6 @@ ts_create(void)
        pthread_key_create(&dirbuf_key, NULL);
        pthread_key_create(&dir_freemap_key, NULL);
        pthread_key_create(&attr_freemap_key, NULL);
-
-       pthread_key_create(&dblkmap_key, NULL);
-       pthread_key_create(&ablkmap_key, NULL);
 }
 
 void
@@ -158,5 +151,5 @@ xfs_init(libxfs_init_t *args)
        ts_create();
        ts_init();
        increase_rlimit();
-       pftrace_init();
+       radix_tree_init();
 }
index b2f2d624db32caba4e790048c0f29d901263a903..170a1953f60c890c97f93f50c06142604eb78c4d 100644 (file)
@@ -109,6 +109,7 @@ void
 phase2(xfs_mount_t *mp)
 {
        xfs_agnumber_t          i;
+       xfs_agblock_t           b;
        int                     j;
        ino_tree_node_t         *ino_rec;
 
@@ -133,6 +134,12 @@ phase2(xfs_mount_t *mp)
 
        do_log(_("        - scan filesystem freespace and inode maps...\n"));
 
+       /*
+        * account for space used by ag headers and log if internal
+        */
+       set_bmap_log(mp);
+       set_bmap_fs(mp);
+
        bad_ino_btree = 0;
 
        set_progress_msg(PROG_FMT_SCAN_AG, (__uint64_t) glob_agcount);
@@ -168,8 +175,11 @@ phase2(xfs_mount_t *mp)
                /*
                 * also mark blocks
                 */
-               set_bmap_ext(0, XFS_INO_TO_AGBNO(mp, mp->m_sb.sb_rootino),
-                            mp->m_ialloc_blks, XR_E_INO);
+               for (b = 0; b < mp->m_ialloc_blks; b++)  {
+                       set_agbno_state(mp, 0,
+                               b + XFS_INO_TO_AGBNO(mp, mp->m_sb.sb_rootino),
+                               XR_E_INO);
+               }
        } else  {
                do_log(_("        - found root inode chunk\n"));
 
index 32e855cb516273fdee8481ffff51f4e68889ea11..c36a1c56a599d9d05eeecd38aa58cfefa2747610 100644 (file)
@@ -61,8 +61,14 @@ walk_unlinked_list(xfs_mount_t *mp, xfs_agnumber_t agno, xfs_agino_t start_ino)
                                agbno = XFS_AGINO_TO_AGBNO(mp, current_ino);
 
                                pthread_mutex_lock(&ag_locks[agno]);
-                               state = get_bmap(agno, agbno);
-                               switch (state) {
+                               switch (state = get_agbno_state(mp,
+                                                       agno, agbno))  {
+                               case XR_E_UNKNOWN:
+                               case XR_E_FREE:
+                               case XR_E_FREE1:
+                                       set_agbno_state(mp, agno, agbno,
+                                               XR_E_INO);
+                                       break;
                                case XR_E_BAD_STATE:
                                        do_error(_(
                                                "bad state in block map %d\n"),
@@ -79,7 +85,8 @@ walk_unlinked_list(xfs_mount_t *mp, xfs_agnumber_t agno, xfs_agino_t start_ino)
                                         * anyway, hopefully without
                                         * losing too much other data
                                         */
-                                       set_bmap(agno, agbno, XR_E_INO);
+                                       set_agbno_state(mp, agno, agbno,
+                                               XR_E_INO);
                                        break;
                                }
                                pthread_mutex_unlock(&ag_locks[agno]);
index aaef1f9e63cfab47e38f1448b99aff86964255e6..ebea378836185373fd8043c7b141c8d2c6bce886 100644 (file)
@@ -192,7 +192,8 @@ phase4(xfs_mount_t *mp)
        xfs_agnumber_t          i;
        xfs_agblock_t           j;
        xfs_agblock_t           ag_end;
-       xfs_extlen_t            blen;
+       xfs_agblock_t           extent_start;
+       xfs_extlen_t            extent_len;
        int                     ag_hdr_len = 4 * mp->m_sb.sb_sectsize;
        int                     ag_hdr_block;
        int                     bstate;
@@ -225,13 +226,30 @@ phase4(xfs_mount_t *mp)
                ag_end = (i < mp->m_sb.sb_agcount - 1) ? mp->m_sb.sb_agblocks :
                        mp->m_sb.sb_dblocks -
                                (xfs_drfsbno_t) mp->m_sb.sb_agblocks * i;
-
+               extent_start = extent_len = 0;
                /*
                 * set up duplicate extent list for this ag
                 */
-               for (j = ag_hdr_block; j < ag_end; j += blen)  {
-                       bstate = get_bmap_ext(i, j, ag_end, &blen);
-                       switch (bstate) {
+               for (j = ag_hdr_block; j < ag_end; j++)  {
+
+                       /* Process in chunks of 16 (XR_BB_UNIT/XR_BB) */
+                       if ((extent_start == 0) && ((j & XR_BB_MASK) == 0)) {
+                               switch(ba_bmap[i][j>>XR_BB]) {
+                               case XR_E_UNKNOWN_LL:
+                               case XR_E_FREE1_LL:
+                               case XR_E_FREE_LL:
+                               case XR_E_INUSE_LL:
+                               case XR_E_INUSE_FS_LL:
+                               case XR_E_INO_LL:
+                               case XR_E_FS_MAP_LL:
+                                       j += (XR_BB_UNIT/XR_BB) - 1;
+                                       continue;
+                               }
+                       }
+
+                       bstate = get_agbno_state(mp, i, j);
+
+                       switch (bstate)  {
                        case XR_E_BAD_STATE:
                        default:
                                do_warn(
@@ -245,13 +263,37 @@ phase4(xfs_mount_t *mp)
                        case XR_E_INUSE_FS:
                        case XR_E_INO:
                        case XR_E_FS_MAP:
+                               if (extent_start == 0)
+                                       continue;
+                               else  {
+                                       /*
+                                        * add extent and reset extent state
+                                        */
+                                       add_dup_extent(i, extent_start,
+                                                       extent_len);
+                                       extent_start = 0;
+                                       extent_len = 0;
+                               }
                                break;
                        case XR_E_MULT:
-                               add_dup_extent(i, j, blen);
+                               if (extent_start == 0)  {
+                                       extent_start = j;
+                                       extent_len = 1;
+                               } else if (extent_len == MAXEXTLEN)  {
+                                       add_dup_extent(i, extent_start,
+                                                       extent_len);
+                                       extent_start = j;
+                                       extent_len = 1;
+                               } else
+                                       extent_len++;
                                break;
                        }
                }
-
+               /*
+                * catch tail-case, extent hitting the end of the ag
+                */
+               if (extent_start != 0)
+                       add_dup_extent(i, extent_start, extent_len);
                PROG_RPT_INC(prog_rpt_done[i], 1);
        }
        print_final_rpt();
@@ -263,7 +305,9 @@ phase4(xfs_mount_t *mp)
        rt_len = 0;
 
        for (bno = 0; bno < mp->m_sb.sb_rextents; bno++)  {
-               bstate = get_rtbmap(bno);
+
+               bstate = get_rtbno_state(mp, bno);
+
                switch (bstate)  {
                case XR_E_BAD_STATE:
                default:
@@ -314,7 +358,19 @@ phase4(xfs_mount_t *mp)
        /*
         * initialize bitmaps for all AGs
         */
-       reset_bmaps(mp);
+       for (i = 0; i < mp->m_sb.sb_agcount; i++)  {
+               /*
+                * now reset the bitmap for all ags
+                */
+               memset(ba_bmap[i], 0,
+                   roundup((mp->m_sb.sb_agblocks+(NBBY/XR_BB)-1)/(NBBY/XR_BB),
+                                               sizeof(__uint64_t)));
+               for (j = 0; j < ag_hdr_block; j++)
+                       set_agbno_state(mp, i, j, XR_E_INUSE_FS);
+       }
+       set_bmap_rt(mp->m_sb.sb_rextents);
+       set_bmap_log(mp);
+       set_bmap_fs(mp);
 
        do_log(_("        - check for inodes claiming duplicate blocks...\n"));
        set_progress_msg(PROG_FMT_DUP_BLOCKS, (__uint64_t) mp->m_sb.sb_icount);
index d6a0f6a5347158bcb7f00157a366159119550718..26f5aa22cc41559fcd495a7155ee761c570cd026 100644 (file)
@@ -88,8 +88,10 @@ mk_incore_fstree(xfs_mount_t *mp, xfs_agnumber_t agno)
        xfs_agblock_t           agbno;
        xfs_agblock_t           ag_end;
        uint                    free_blocks;
-       xfs_extlen_t            blen;
-       int                     bstate;
+#ifdef XR_BLD_FREE_TRACE
+       int                     old_state;
+       int                     state = XR_E_BAD_STATE;
+#endif
 
        /*
         * scan the bitmap for the ag looking for continuous
@@ -118,10 +120,30 @@ mk_incore_fstree(xfs_mount_t *mp, xfs_agnumber_t agno)
         * ok, now find the number of extents, keep track of the
         * largest extent.
         */
-       for (agbno = 0; agbno < ag_end; agbno += blen) {
-               bstate = get_bmap_ext(agno, agbno, ag_end, &blen);
-               if (bstate < XR_E_INUSE)  {
-                       free_blocks += blen;
+       for (agbno = 0; agbno < ag_end; agbno++)  {
+#if 0
+               old_state = state;
+               state = get_agbno_state(mp, agno, agbno);
+               if (state != old_state)  {
+                       fprintf(stderr, "agbno %u - new state is %d\n",
+                                       agbno, state);
+               }
+#endif
+               /* Process in chunks of 16 (XR_BB_UNIT/XR_BB) */
+               if ((in_extent == 0) && ((agbno & XR_BB_MASK) == 0)) {
+                       /* testing >= XR_E_INUSE */
+                       switch (ba_bmap[agno][agbno>>XR_BB]) {
+                       case XR_E_INUSE_LL:
+                       case XR_E_INUSE_FS_LL:
+                       case XR_E_INO_LL:
+                       case XR_E_FS_MAP_LL:
+                               agbno += (XR_BB_UNIT/XR_BB) - 1;
+                               continue;
+                       }
+
+               }
+               if (get_agbno_state(mp, agno, agbno) < XR_E_INUSE)  {
+                       free_blocks++;
                        if (in_extent == 0)  {
                                /*
                                 * found the start of a free extent
@@ -129,9 +151,9 @@ mk_incore_fstree(xfs_mount_t *mp, xfs_agnumber_t agno)
                                in_extent = 1;
                                num_extents++;
                                extent_start = agbno;
-                               extent_len = blen;
+                               extent_len = 1;
                        } else  {
-                               extent_len += blen;
+                               extent_len++;
                        }
                } else   {
                        if (in_extent)  {
@@ -1442,6 +1464,11 @@ phase5_func(
                                agno);
                }
 
+               /*
+                * done with the AG bitmap, toss it...
+                */
+               teardown_ag_bmap(mp, agno);
+
                /*
                 * ok, now set up the btree cursors for the
                 * on-disk btrees (includs pre-allocating all
@@ -1628,6 +1655,7 @@ phase5(xfs_mount_t *mp)
                _("        - generate realtime summary info and bitmap...\n"));
                rtinit(mp);
                generate_rtinfo(mp, btmcompute, sumcompute);
+               teardown_rt_bmap(mp);
        }
 
        do_log(_("        - reset superblock...\n"));
index d0560631f9f02b98a651dc1248c71d815d06dfd4..becedbd2738c1a3f91ec682fff769b9d26e24f28 100644 (file)
@@ -3661,6 +3661,11 @@ phase6(xfs_mount_t *mp)
 
        do_log(_("Phase 6 - check inode connectivity...\n"));
 
+       if (!no_modify)
+               teardown_bmap_finish(mp);
+       else
+               teardown_bmap(mp);
+
        incore_ext_teardown(mp);
 
        add_ino_ex_data(mp);
index aaa56d181b92fd45792b1ca73d63beaad8278b41..2c78db0d73cf2f49ea54c5ef3f6894bd3c7e69d8 100644 (file)
@@ -1,7 +1,6 @@
 #include <libxfs.h>
 #include <pthread.h>
 #include "avl.h"
-#include "btree.h"
 #include "globals.h"
 #include "agheader.h"
 #include "incore.h"
@@ -15,6 +14,7 @@
 #include "threads.h"
 #include "prefetch.h"
 #include "progress.h"
+#include "radix-tree.h"
 
 int do_prefetch = 1;
 
@@ -83,8 +83,9 @@ pf_start_processing(
        prefetch_args_t         *args)
 {
        if (!args->can_start_processing) {
+#ifdef XR_PF_TRACE
                pftrace("signalling processing for AG %d", args->agno);
-
+#endif
                args->can_start_processing = 1;
                pthread_cond_signal(&args->start_processing);
        }
@@ -95,8 +96,9 @@ pf_start_io_workers(
        prefetch_args_t         *args)
 {
        if (!args->can_start_reading) {
+#ifdef XR_PF_TRACE
                pftrace("signalling reading for AG %d", args->agno);
-
+#endif
                args->can_start_reading = 1;
                pthread_cond_broadcast(&args->start_reading);
        }
@@ -126,24 +128,35 @@ pf_queue_io(
 
        pthread_mutex_lock(&args->lock);
 
-       btree_insert(args->io_queue, fsbno, bp);
-
        if (fsbno > args->last_bno_read) {
-               if (B_IS_INODE(flag)) {
+               radix_tree_insert(&args->primary_io_queue, fsbno, bp);
+               if (!B_IS_INODE(flag))
+                       radix_tree_tag_set(&args->primary_io_queue, fsbno, 0);
+               else {
                        args->inode_bufs_queued++;
                        if (args->inode_bufs_queued == IO_THRESHOLD)
                                pf_start_io_workers(args);
                }
+#ifdef XR_PF_TRACE
+               pftrace("getbuf %c %p (%llu) in AG %d (fsbno = %lu) added to "
+                       "primary queue (inode_bufs_queued = %d, last_bno = %lu)",
+                       B_IS_INODE(flag) ? 'I' : 'M', bp,
+                       (long long)XFS_BUF_ADDR(bp), args->agno, fsbno,
+                       args->inode_bufs_queued, args->last_bno_read);
+#endif
        } else {
+#ifdef XR_PF_TRACE
+               pftrace("getbuf %c %p (%llu) in AG %d (fsbno = %lu) added to "
+                       "secondary queue (last_bno = %lu)",
+                       B_IS_INODE(flag) ? 'I' : 'M', bp,
+                       (long long)XFS_BUF_ADDR(bp), args->agno, fsbno,
+                       args->last_bno_read);
+#endif
                ASSERT(!B_IS_INODE(flag));
                XFS_BUF_SET_PRIORITY(bp, B_DIR_META_2);
+               radix_tree_insert(&args->secondary_io_queue, fsbno, bp);
        }
 
-       pftrace("getbuf %c %p (%llu) in AG %d (fsbno = %lu) added to queue"
-               "(inode_bufs_queued = %d, last_bno = %lu)", B_IS_INODE(flag) ?
-               'I' : 'M', bp, (long long)XFS_BUF_ADDR(bp), args->agno, fsbno,
-               args->inode_bufs_queued, args->last_bno_read);
-
        pf_start_processing(args);
 
        pthread_mutex_unlock(&args->lock);
@@ -181,9 +194,9 @@ pf_read_bmbt_reclist(
 
                while (irec.br_blockcount) {
                        unsigned int    len;
-
+#ifdef XR_PF_TRACE
                        pftrace("queuing dir extent in AG %d", args->agno);
-
+#endif
                        len = (irec.br_blockcount > mp->m_dirblkfsbs) ?
                                        mp->m_dirblkfsbs : irec.br_blockcount;
                        pf_queue_io(args, irec.br_startblock, len, B_DIR_META);
@@ -394,6 +407,7 @@ pf_batch_read(
        pf_which_t              which,
        void                    *buf)
 {
+       struct radix_tree_root  *queue;
        xfs_buf_t               *bplist[MAX_BUFS];
        unsigned int            num;
        off64_t                 first_off, last_off, next_off;
@@ -401,28 +415,27 @@ pf_batch_read(
        int                     i;
        int                     inode_bufs;
        unsigned long           fsbno;
-       unsigned long           max_fsbno;
        char                    *pbuf;
 
-       for (;;) {
-               num = 0;
-               if (which == PF_SECONDARY) {
-                       bplist[0] = btree_find(args->io_queue, 0, &fsbno);
-                       max_fsbno = MIN(fsbno + pf_max_fsbs,
-                                                       args->last_bno_read);
+       queue = (which != PF_SECONDARY) ? &args->primary_io_queue
+                               : &args->secondary_io_queue;
+
+       while (radix_tree_lookup_first(queue, &fsbno) != NULL) {
+
+               if (which != PF_META_ONLY) {
+                       num = radix_tree_gang_lookup_ex(queue,
+                                       (void**)&bplist[0], fsbno,
+                                       fsbno + pf_max_fsbs, MAX_BUFS);
+                       ASSERT(num > 0);
+                       ASSERT(XFS_FSB_TO_DADDR(mp, fsbno) ==
+                               XFS_BUF_ADDR(bplist[0]));
                } else {
-                       bplist[0] = btree_find(args->io_queue,
-                                               args->last_bno_read, &fsbno);
-                       max_fsbno = fsbno + pf_max_fsbs;
-               }
-               while (bplist[num] && num < MAX_BUFS && fsbno < max_fsbno) {
-                       if (which != PF_META_ONLY ||
-                           !B_IS_INODE(XFS_BUF_PRIORITY(bplist[num])))
-                               num++;
-                       bplist[num] = btree_lookup_next(args->io_queue, &fsbno);
+                       num = radix_tree_gang_lookup_tag(queue,
+                                       (void**)&bplist[0], fsbno,
+                                       MAX_BUFS / 4, 0);
+                       if (num == 0)
+                               return;
                }
-               if (!num)
-                       return;
 
                /*
                 * do a big read if 25% of the potential buffer is useful,
@@ -431,22 +444,21 @@ pf_batch_read(
                 */
                first_off = LIBXFS_BBTOOFF64(XFS_BUF_ADDR(bplist[0]));
                last_off = LIBXFS_BBTOOFF64(XFS_BUF_ADDR(bplist[num-1])) +
-                                               XFS_BUF_SIZE(bplist[num-1]);
+                       XFS_BUF_SIZE(bplist[num-1]);
                while (last_off - first_off > pf_max_bytes) {
                        num--;
-                       last_off = LIBXFS_BBTOOFF64(XFS_BUF_ADDR(
-                               bplist[num-1])) + XFS_BUF_SIZE(bplist[num-1]);
+                       last_off = LIBXFS_BBTOOFF64(XFS_BUF_ADDR(bplist[num-1])) +
+                               XFS_BUF_SIZE(bplist[num-1]);
                }
-               if (num < ((last_off - first_off) >>
-                                               (mp->m_sb.sb_blocklog + 3))) {
+               if (num < ((last_off - first_off) >> (mp->m_sb.sb_blocklog + 3))) {
                        /*
                         * not enough blocks for one big read, so determine
                         * the number of blocks that are close enough.
                         */
                        last_off = first_off + XFS_BUF_SIZE(bplist[0]);
                        for (i = 1; i < num; i++) {
-                               next_off = LIBXFS_BBTOOFF64(XFS_BUF_ADDR(
-                                       bplist[i])) + XFS_BUF_SIZE(bplist[i]);
+                               next_off = LIBXFS_BBTOOFF64(XFS_BUF_ADDR(bplist[i])) +
+                                               XFS_BUF_SIZE(bplist[i]);
                                if (next_off - last_off > pf_batch_bytes)
                                        break;
                                last_off = next_off;
@@ -455,7 +467,7 @@ pf_batch_read(
                }
 
                for (i = 0; i < num; i++) {
-                       if (btree_delete(args->io_queue, XFS_DADDR_TO_FSB(mp,
+                       if (radix_tree_delete(queue, XFS_DADDR_TO_FSB(mp,
                                        XFS_BUF_ADDR(bplist[i]))) == NULL)
                                do_error(_("prefetch corruption\n"));
                }
@@ -509,16 +521,20 @@ pf_batch_read(
                        }
                }
                for (i = 0; i < num; i++) {
+#ifdef XR_PF_TRACE
                        pftrace("putbuf %c %p (%llu) in AG %d",
                                B_IS_INODE(XFS_BUF_PRIORITY(bplist[i])) ? 'I' : 'M',
                                bplist[i], (long long)XFS_BUF_ADDR(bplist[i]),
                                args->agno);
+#endif
                        libxfs_putbuf(bplist[i]);
                }
                pthread_mutex_lock(&args->lock);
                if (which != PF_SECONDARY) {
+#ifdef XR_PF_TRACE
                        pftrace("inode_bufs_queued for AG %d = %d", args->agno,
                                args->inode_bufs_queued);
+#endif
                        /*
                         * if primary inode queue running low, process metadata
                         * in boths queues to avoid I/O starvation as the
@@ -527,14 +543,15 @@ pf_batch_read(
                         */
                        if (which == PF_PRIMARY && !args->queuing_done &&
                                        args->inode_bufs_queued < IO_THRESHOLD) {
+#ifdef XR_PF_TRACE
                                pftrace("reading metadata bufs from primary queue for AG %d",
                                        args->agno);
-
+#endif
                                pf_batch_read(args, PF_META_ONLY, buf);
-
+#ifdef XR_PF_TRACE
                                pftrace("reading bufs from secondary queue for AG %d",
                                        args->agno);
-
+#endif
                                pf_batch_read(args, PF_SECONDARY, buf);
                        }
                }
@@ -553,20 +570,22 @@ pf_io_worker(
                return NULL;
 
        pthread_mutex_lock(&args->lock);
-       while (!args->queuing_done || btree_find(args->io_queue, 0, NULL)) {
+       while (!args->queuing_done || args->primary_io_queue.height) {
 
+#ifdef XR_PF_TRACE
                pftrace("waiting to start prefetch I/O for AG %d", args->agno);
-
+#endif
                while (!args->can_start_reading && !args->queuing_done)
                        pthread_cond_wait(&args->start_reading, &args->lock);
-
+#ifdef XR_PF_TRACE
                pftrace("starting prefetch I/O for AG %d", args->agno);
-
+#endif
                pf_batch_read(args, PF_PRIMARY, buf);
                pf_batch_read(args, PF_SECONDARY, buf);
 
+#ifdef XR_PF_TRACE
                pftrace("ran out of bufs to prefetch for AG %d", args->agno);
-
+#endif
                if (!args->queuing_done)
                        args->can_start_reading = 0;
        }
@@ -574,8 +593,9 @@ pf_io_worker(
 
        free(buf);
 
+#ifdef XR_PF_TRACE
        pftrace("finished prefetch I/O for AG %d", args->agno);
-
+#endif
        return NULL;
 }
 
@@ -617,7 +637,10 @@ pf_queuing_worker(
                        break;
                }
        }
+
+#ifdef XR_PF_TRACE
        pftrace("starting prefetch for AG %d", args->agno);
+#endif
 
        for (irec = findfirst_inode_rec(args->agno); irec != NULL;
                        irec = next_ino_rec(irec)) {
@@ -654,9 +677,10 @@ pf_queuing_worker(
 
        pthread_mutex_lock(&args->lock);
 
+#ifdef XR_PF_TRACE
        pftrace("finished queuing inodes for AG %d (inode_bufs_queued = %d)",
                args->agno, args->inode_bufs_queued);
-
+#endif
        args->queuing_done = 1;
        pf_start_io_workers(args);
        pf_start_processing(args);
@@ -667,11 +691,13 @@ pf_queuing_worker(
                if (args->io_threads[i])
                        pthread_join(args->io_threads[i], NULL);
 
+#ifdef XR_PF_TRACE
        pftrace("prefetch for AG %d finished", args->agno);
-
+#endif
        pthread_mutex_lock(&args->lock);
 
-       ASSERT(btree_find(args->io_queue, 0, NULL) == NULL);
+       ASSERT(args->primary_io_queue.height == 0);
+       ASSERT(args->secondary_io_queue.height == 0);
 
        args->prefetch_done = 1;
        if (args->next_args)
@@ -688,8 +714,9 @@ pf_create_prefetch_thread(
 {
        int                     err;
 
+#ifdef XR_PF_TRACE
        pftrace("creating queue thread for AG %d", args->agno);
-
+#endif
        err = pthread_create(&args->queuing_thread, NULL,
                        pf_queuing_worker, args);
        if (err != 0) {
@@ -728,7 +755,8 @@ start_inode_prefetch(
 
        args = calloc(1, sizeof(prefetch_args_t));
 
-       btree_init(&args->io_queue);
+       INIT_RADIX_TREE(&args->primary_io_queue, 0);
+       INIT_RADIX_TREE(&args->secondary_io_queue, 0);
        if (pthread_mutex_init(&args->lock, NULL) != 0)
                do_error(_("failed to initialize prefetch mutex\n"));
        if (pthread_cond_init(&args->start_reading, NULL) != 0)
@@ -776,12 +804,14 @@ wait_for_inode_prefetch(
        pthread_mutex_lock(&args->lock);
 
        while (!args->can_start_processing) {
+#ifdef XR_PF_TRACE
                pftrace("waiting to start processing AG %d", args->agno);
-
+#endif
                pthread_cond_wait(&args->start_processing, &args->lock);
        }
+#ifdef XR_PF_TRACE
        pftrace("can start processing AG %d", args->agno);
-
+#endif
        pthread_mutex_unlock(&args->lock);
 }
 
@@ -792,39 +822,25 @@ cleanup_inode_prefetch(
        if (args == NULL)
                return;
 
+#ifdef XR_PF_TRACE
        pftrace("waiting AG %d prefetch to finish", args->agno);
-
+#endif
        if (args->queuing_thread)
                pthread_join(args->queuing_thread, NULL);
 
+#ifdef XR_PF_TRACE
        pftrace("AG %d prefetch done", args->agno);
-
+#endif
        pthread_mutex_destroy(&args->lock);
        pthread_cond_destroy(&args->start_reading);
        pthread_cond_destroy(&args->start_processing);
        sem_destroy(&args->ra_count);
-       btree_destroy(args->io_queue);
 
        free(args);
 }
 
 #ifdef XR_PF_TRACE
 
-static FILE    *pf_trace_file;
-
-void
-pftrace_init(void)
-{
-       pf_trace_file = fopen("/tmp/xfs_repair_prefetch.trace", "w");
-       setvbuf(pf_trace_file, NULL, _IOLBF, 1024);
-}
-
-void
-pftrace_done(void)
-{
-       fclose(pf_trace_file);
-}
-
 void
 _pftrace(const char *func, const char *msg, ...)
 {
@@ -839,8 +855,7 @@ _pftrace(const char *func, const char *msg, ...)
        buf[sizeof(buf)-1] = '\0';
        va_end(args);
 
-       fprintf(pf_trace_file, "%lu.%06lu  %s: %s\n", tv.tv_sec, tv.tv_usec,
-               func, buf);
+       fprintf(pf_trace_file, "%lu.%06lu  %s: %s\n", tv.tv_sec, tv.tv_usec, func, buf);
 }
 
 #endif
index 44a406c0198bcb893717d11666537501b5df7071..60ba96646e17bdcead8eb1b37bc0bbc873a2d347 100644 (file)
@@ -3,6 +3,7 @@
 
 #include <semaphore.h>
 #include "incore.h"
+#include "radix-tree.h"
 
 
 extern int     do_prefetch;
@@ -13,7 +14,8 @@ typedef struct prefetch_args {
        pthread_mutex_t         lock;
        pthread_t               queuing_thread;
        pthread_t               io_threads[PF_THREAD_COUNT];
-       struct btree_root       *io_queue;
+       struct radix_tree_root  primary_io_queue;
+       struct radix_tree_root  secondary_io_queue;
        pthread_cond_t          start_reading;
        pthread_cond_t          start_processing;
        int                     agno;
@@ -50,15 +52,8 @@ cleanup_inode_prefetch(
 
 
 #ifdef XR_PF_TRACE
-void   pftrace_init(void);
-void   pftrace_done(void);
-
 #define pftrace(msg...)        _pftrace(__FUNCTION__, ## msg)
 void   _pftrace(const char *, const char *, ...);
-#else
-static inline void pftrace_init(void) { };
-static inline void pftrace_done(void) { };
-static inline void pftrace(const char *msg, ...) { };
 #endif
 
 #endif /* _XFS_REPAIR_PREFETCH_H */
diff --git a/repair/radix-tree.c b/repair/radix-tree.c
new file mode 100644 (file)
index 0000000..36a6324
--- /dev/null
@@ -0,0 +1,805 @@
+/*
+ * Copyright (C) 2001 Momchil Velikov
+ * Portions Copyright (C) 2001 Christoph Hellwig
+ * Copyright (C) 2005 SGI, Christoph Lameter <clameter@sgi.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <libxfs.h>
+#include "radix-tree.h"
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+#define RADIX_TREE_MAP_SHIFT   6
+#define RADIX_TREE_MAP_SIZE    (1UL << RADIX_TREE_MAP_SHIFT)
+#define RADIX_TREE_MAP_MASK    (RADIX_TREE_MAP_SIZE-1)
+
+#ifdef RADIX_TREE_TAGS
+#define RADIX_TREE_TAG_LONGS   \
+       ((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG)
+#endif
+
+struct radix_tree_node {
+       unsigned int    count;
+       void            *slots[RADIX_TREE_MAP_SIZE];
+#ifdef RADIX_TREE_TAGS
+       unsigned long   tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];
+#endif
+};
+
+struct radix_tree_path {
+       struct radix_tree_node *node;
+       int offset;
+};
+
+#define RADIX_TREE_INDEX_BITS  (8 /* CHAR_BIT */ * sizeof(unsigned long))
+#define RADIX_TREE_MAX_PATH (RADIX_TREE_INDEX_BITS/RADIX_TREE_MAP_SHIFT + 2)
+
+static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH];
+
+/*
+ * Radix tree node cache.
+ */
+
+#define radix_tree_node_alloc(r)       ((struct radix_tree_node *) \
+               calloc(1, sizeof(struct radix_tree_node)))
+#define radix_tree_node_free(n)        free(n)
+
+#ifdef RADIX_TREE_TAGS
+
+static inline void tag_set(struct radix_tree_node *node, unsigned int tag,
+               int offset)
+{
+       *((__uint32_t *)node->tags[tag] + (offset >> 5)) |= (1 << (offset & 31));
+}
+
+static inline void tag_clear(struct radix_tree_node *node, unsigned int tag,
+               int offset)
+{
+       __uint32_t      *p = (__uint32_t*)node->tags[tag] + (offset >> 5);
+       __uint32_t      m = 1 << (offset & 31);
+       *p &= ~m;
+}
+
+static inline int tag_get(struct radix_tree_node *node, unsigned int tag,
+               int offset)
+{
+       return 1 & (((const __uint32_t *)node->tags[tag])[offset >> 5] >> (offset & 31));
+}
+
+/*
+ * Returns 1 if any slot in the node has this tag set.
+ * Otherwise returns 0.
+ */
+static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag)
+{
+       int idx;
+       for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) {
+               if (node->tags[tag][idx])
+                       return 1;
+       }
+       return 0;
+}
+
+#endif
+
+/*
+ *     Return the maximum key which can be store into a
+ *     radix tree with height HEIGHT.
+ */
+static inline unsigned long radix_tree_maxindex(unsigned int height)
+{
+       return height_to_maxindex[height];
+}
+
+/*
+ *     Extend a radix tree so it can store key @index.
+ */
+static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
+{
+       struct radix_tree_node *node;
+       unsigned int height;
+#ifdef RADIX_TREE_TAGS
+       char tags[RADIX_TREE_MAX_TAGS];
+       int tag;
+#endif
+
+       /* Figure out what the height should be.  */
+       height = root->height + 1;
+       while (index > radix_tree_maxindex(height))
+               height++;
+
+       if (root->rnode == NULL) {
+               root->height = height;
+               goto out;
+       }
+
+#ifdef RADIX_TREE_TAGS
+       /*
+        * Prepare the tag status of the top-level node for propagation
+        * into the newly-pushed top-level node(s)
+        */
+       for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
+               tags[tag] = 0;
+               if (any_tag_set(root->rnode, tag))
+                       tags[tag] = 1;
+       }
+#endif
+       do {
+               if (!(node = radix_tree_node_alloc(root)))
+                       return -ENOMEM;
+
+               /* Increase the height.  */
+               node->slots[0] = root->rnode;
+
+#ifdef RADIX_TREE_TAGS
+               /* Propagate the aggregated tag info into the new root */
+               for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
+                       if (tags[tag])
+                               tag_set(node, tag, 0);
+               }
+#endif
+               node->count = 1;
+               root->rnode = node;
+               root->height++;
+       } while (height > root->height);
+out:
+       return 0;
+}
+
+/**
+ *     radix_tree_insert    -    insert into a radix tree
+ *     @root:          radix tree root
+ *     @index:         index key
+ *     @item:          item to insert
+ *
+ *     Insert an item into the radix tree at position @index.
+ */
+int radix_tree_insert(struct radix_tree_root *root,
+                       unsigned long index, void *item)
+{
+       struct radix_tree_node *node = NULL, *slot;
+       unsigned int height, shift;
+       int offset;
+       int error;
+
+       /* Make sure the tree is high enough.  */
+       if ((!index && !root->rnode) ||
+                       index > radix_tree_maxindex(root->height)) {
+               error = radix_tree_extend(root, index);
+               if (error)
+                       return error;
+       }
+
+       slot = root->rnode;
+       height = root->height;
+       shift = (height-1) * RADIX_TREE_MAP_SHIFT;
+
+       offset = 0;                     /* uninitialised var warning */
+       do {
+               if (slot == NULL) {
+                       /* Have to add a child node.  */
+                       if (!(slot = radix_tree_node_alloc(root)))
+                               return -ENOMEM;
+                       if (node) {
+                               node->slots[offset] = slot;
+                               node->count++;
+                       } else
+                               root->rnode = slot;
+               }
+
+               /* Go a level down */
+               offset = (index >> shift) & RADIX_TREE_MAP_MASK;
+               node = slot;
+               slot = node->slots[offset];
+               shift -= RADIX_TREE_MAP_SHIFT;
+               height--;
+       } while (height > 0);
+
+       if (slot != NULL)
+               return -EEXIST;
+
+       ASSERT(node);
+       node->count++;
+       node->slots[offset] = item;
+#ifdef RADIX_TREE_TAGS
+       ASSERT(!tag_get(node, 0, offset));
+       ASSERT(!tag_get(node, 1, offset));
+#endif
+       return 0;
+}
+
+static inline void **__lookup_slot(struct radix_tree_root *root,
+                                  unsigned long index)
+{
+       unsigned int height, shift;
+       struct radix_tree_node **slot;
+
+       height = root->height;
+       if (index > radix_tree_maxindex(height))
+               return NULL;
+
+       shift = (height-1) * RADIX_TREE_MAP_SHIFT;
+       slot = &root->rnode;
+
+       while (height > 0) {
+               if (*slot == NULL)
+                       return NULL;
+
+               slot = (struct radix_tree_node **)
+                       ((*slot)->slots +
+                               ((index >> shift) & RADIX_TREE_MAP_MASK));
+               shift -= RADIX_TREE_MAP_SHIFT;
+               height--;
+       }
+
+       return (void **)slot;
+}
+
+/**
+ *     radix_tree_lookup_slot    -    lookup a slot in a radix tree
+ *     @root:          radix tree root
+ *     @index:         index key
+ *
+ *     Lookup the slot corresponding to the position @index in the radix tree
+ *     @root. This is useful for update-if-exists operations.
+ */
+void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index)
+{
+       return __lookup_slot(root, index);
+}
+
+/**
+ *     radix_tree_lookup    -    perform lookup operation on a radix tree
+ *     @root:          radix tree root
+ *     @index:         index key
+ *
+ *     Lookup the item at the position @index in the radix tree @root.
+ */
+void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index)
+{
+       void **slot;
+
+       slot = __lookup_slot(root, index);
+       return slot != NULL ? *slot : NULL;
+}
+
+/**
+ *     raid_tree_first_key - find the first index key in the radix tree
+ *     @root:          radix tree root
+ *     @index:         where the first index will be placed
+ *
+ *     Returns the first entry and index key in the radix tree @root.
+ */
+void *radix_tree_lookup_first(struct radix_tree_root *root, unsigned long *index)
+{
+       unsigned int height, shift;
+       struct radix_tree_node *slot;
+       unsigned long i;
+
+       height = root->height;
+       *index = 0;
+       if (height == 0)
+               return NULL;
+
+       shift = (height-1) * RADIX_TREE_MAP_SHIFT;
+       slot = root->rnode;
+
+       for (; height > 1; height--) {
+               for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
+                       if (slot->slots[i] != NULL)
+                               break;
+               }
+               ASSERT(i < RADIX_TREE_MAP_SIZE);
+
+               *index |= (i << shift);
+               shift -= RADIX_TREE_MAP_SHIFT;
+               slot = slot->slots[i];
+       }
+       for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
+               if (slot->slots[i] != NULL) {
+                       *index |= i;
+                       return slot->slots[i];
+               }
+       }
+       return NULL;
+}
+
+#ifdef RADIX_TREE_TAGS
+
+/**
+ *     radix_tree_tag_set - set a tag on a radix tree node
+ *     @root:          radix tree root
+ *     @index:         index key
+ *     @tag:           tag index
+ *
+ *     Set the search tag (which must be < RADIX_TREE_MAX_TAGS)
+ *     corresponding to @index in the radix tree.  From
+ *     the root all the way down to the leaf node.
+ *
+ *     Returns the address of the tagged item.   Setting a tag on a not-present
+ *     item is a bug.
+ */
+void *radix_tree_tag_set(struct radix_tree_root *root,
+                       unsigned long index, unsigned int tag)
+{
+       unsigned int height, shift;
+       struct radix_tree_node *slot;
+
+       height = root->height;
+       if (index > radix_tree_maxindex(height))
+               return NULL;
+
+       shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
+       slot = root->rnode;
+
+       while (height > 0) {
+               int offset;
+
+               offset = (index >> shift) & RADIX_TREE_MAP_MASK;
+               if (!tag_get(slot, tag, offset))
+                       tag_set(slot, tag, offset);
+               slot = slot->slots[offset];
+               ASSERT(slot != NULL);
+               shift -= RADIX_TREE_MAP_SHIFT;
+               height--;
+       }
+
+       return slot;
+}
+
+/**
+ *     radix_tree_tag_clear - clear a tag on a radix tree node
+ *     @root:          radix tree root
+ *     @index:         index key
+ *     @tag:           tag index
+ *
+ *     Clear the search tag (which must be < RADIX_TREE_MAX_TAGS)
+ *     corresponding to @index in the radix tree.  If
+ *     this causes the leaf node to have no tags set then clear the tag in the
+ *     next-to-leaf node, etc.
+ *
+ *     Returns the address of the tagged item on success, else NULL.  ie:
+ *     has the same return value and semantics as radix_tree_lookup().
+ */
+void *radix_tree_tag_clear(struct radix_tree_root *root,
+                       unsigned long index, unsigned int tag)
+{
+       struct radix_tree_path path[RADIX_TREE_MAX_PATH], *pathp = path;
+       struct radix_tree_node *slot;
+       unsigned int height, shift;
+       void *ret = NULL;
+
+       height = root->height;
+       if (index > radix_tree_maxindex(height))
+               goto out;
+
+       shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
+       pathp->node = NULL;
+       slot = root->rnode;
+
+       while (height > 0) {
+               int offset;
+
+               if (slot == NULL)
+                       goto out;
+
+               offset = (index >> shift) & RADIX_TREE_MAP_MASK;
+               pathp[1].offset = offset;
+               pathp[1].node = slot;
+               slot = slot->slots[offset];
+               pathp++;
+               shift -= RADIX_TREE_MAP_SHIFT;
+               height--;
+       }
+
+       ret = slot;
+       if (ret == NULL)
+               goto out;
+
+       do {
+               if (!tag_get(pathp->node, tag, pathp->offset))
+                       goto out;
+               tag_clear(pathp->node, tag, pathp->offset);
+               if (any_tag_set(pathp->node, tag))
+                       goto out;
+               pathp--;
+       } while (pathp->node);
+out:
+       return ret;
+}
+
+#endif
+
+static unsigned int
+__lookup(struct radix_tree_root *root, void **results, unsigned long index,
+       unsigned int max_items, unsigned long *next_index)
+{
+       unsigned int nr_found = 0;
+       unsigned int shift, height;
+       struct radix_tree_node *slot;
+       unsigned long i;
+
+       height = root->height;
+       if (height == 0)
+               goto out;
+
+       shift = (height-1) * RADIX_TREE_MAP_SHIFT;
+       slot = root->rnode;
+
+       for ( ; height > 1; height--) {
+
+               for (i = (index >> shift) & RADIX_TREE_MAP_MASK ;
+                               i < RADIX_TREE_MAP_SIZE; i++) {
+                       if (slot->slots[i] != NULL)
+                               break;
+                       index &= ~((1UL << shift) - 1);
+                       index += 1UL << shift;
+                       if (index == 0)
+                               goto out;       /* 32-bit wraparound */
+               }
+               if (i == RADIX_TREE_MAP_SIZE)
+                       goto out;
+
+               shift -= RADIX_TREE_MAP_SHIFT;
+               slot = slot->slots[i];
+       }
+
+       /* Bottom level: grab some items */
+       for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) {
+               index++;
+               if (slot->slots[i]) {
+                       results[nr_found++] = slot->slots[i];
+                       if (nr_found == max_items)
+                               goto out;
+               }
+       }
+out:
+       *next_index = index;
+       return nr_found;
+}
+
+/**
+ *     radix_tree_gang_lookup - perform multiple lookup on a radix tree
+ *     @root:          radix tree root
+ *     @results:       where the results of the lookup are placed
+ *     @first_index:   start the lookup from this key
+ *     @max_items:     place up to this many items at *results
+ *
+ *     Performs an index-ascending scan of the tree for present items.  Places
+ *     them at *@results and returns the number of items which were placed at
+ *     *@results.
+ *
+ *     The implementation is naive.
+ */
+unsigned int
+radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
+                       unsigned long first_index, unsigned int max_items)
+{
+       const unsigned long max_index = radix_tree_maxindex(root->height);
+       unsigned long cur_index = first_index;
+       unsigned int ret = 0;
+
+       while (ret < max_items) {
+               unsigned int nr_found;
+               unsigned long next_index;       /* Index of next search */
+
+               if (cur_index > max_index)
+                       break;
+               nr_found = __lookup(root, results + ret, cur_index,
+                                       max_items - ret, &next_index);
+               ret += nr_found;
+               if (next_index == 0)
+                       break;
+               cur_index = next_index;
+       }
+       return ret;
+}
+
+/**
+ *     radix_tree_gang_lookup_ex - perform multiple lookup on a radix tree
+ *     @root:          radix tree root
+ *     @results:       where the results of the lookup are placed
+ *     @first_index:   start the lookup from this key
+ *     @last_index:    don't lookup past this key
+ *     @max_items:     place up to this many items at *results
+ *
+ *     Performs an index-ascending scan of the tree for present items starting
+ *     @first_index until @last_index up to as many as @max_items.  Places
+ *     them at *@results and returns the number of items which were placed
+ *     at *@results.
+ *
+ *     The implementation is naive.
+ */
+unsigned int
+radix_tree_gang_lookup_ex(struct radix_tree_root *root, void **results,
+                       unsigned long first_index, unsigned long last_index,
+                       unsigned int max_items)
+{
+       const unsigned long max_index = radix_tree_maxindex(root->height);
+       unsigned long cur_index = first_index;
+       unsigned int ret = 0;
+
+       while (ret < max_items && cur_index < last_index) {
+               unsigned int nr_found;
+               unsigned long next_index;       /* Index of next search */
+
+               if (cur_index > max_index)
+                       break;
+               nr_found = __lookup(root, results + ret, cur_index,
+                                       max_items - ret, &next_index);
+               ret += nr_found;
+               if (next_index == 0)
+                       break;
+               cur_index = next_index;
+       }
+       return ret;
+}
+
+#ifdef RADIX_TREE_TAGS
+
+static unsigned int
+__lookup_tag(struct radix_tree_root *root, void **results, unsigned long index,
+       unsigned int max_items, unsigned long *next_index, unsigned int tag)
+{
+       unsigned int nr_found = 0;
+       unsigned int shift;
+       unsigned int height = root->height;
+       struct radix_tree_node *slot;
+
+       shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
+       slot = root->rnode;
+
+       while (height > 0) {
+               unsigned long i = (index >> shift) & RADIX_TREE_MAP_MASK;
+
+               for ( ; i < RADIX_TREE_MAP_SIZE; i++) {
+                       if (tag_get(slot, tag, i)) {
+                               ASSERT(slot->slots[i] != NULL);
+                               break;
+                       }
+                       index &= ~((1UL << shift) - 1);
+                       index += 1UL << shift;
+                       if (index == 0)
+                               goto out;       /* 32-bit wraparound */
+               }
+               if (i == RADIX_TREE_MAP_SIZE)
+                       goto out;
+               height--;
+               if (height == 0) {      /* Bottom level: grab some items */
+                       unsigned long j = index & RADIX_TREE_MAP_MASK;
+
+                       for ( ; j < RADIX_TREE_MAP_SIZE; j++) {
+                               index++;
+                               if (tag_get(slot, tag, j)) {
+                                       ASSERT(slot->slots[j] != NULL);
+                                       results[nr_found++] = slot->slots[j];
+                                       if (nr_found == max_items)
+                                               goto out;
+                               }
+                       }
+               }
+               shift -= RADIX_TREE_MAP_SHIFT;
+               slot = slot->slots[i];
+       }
+out:
+       *next_index = index;
+       return nr_found;
+}
+
+/**
+ *     radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree
+ *                                  based on a tag
+ *     @root:          radix tree root
+ *     @results:       where the results of the lookup are placed
+ *     @first_index:   start the lookup from this key
+ *     @max_items:     place up to this many items at *results
+ *     @tag:           the tag index (< RADIX_TREE_MAX_TAGS)
+ *
+ *     Performs an index-ascending scan of the tree for present items which
+ *     have the tag indexed by @tag set.  Places the items at *@results and
+ *     returns the number of items which were placed at *@results.
+ */
+unsigned int
+radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
+               unsigned long first_index, unsigned int max_items,
+               unsigned int tag)
+{
+       const unsigned long max_index = radix_tree_maxindex(root->height);
+       unsigned long cur_index = first_index;
+       unsigned int ret = 0;
+
+       while (ret < max_items) {
+               unsigned int nr_found;
+               unsigned long next_index;       /* Index of next search */
+
+               if (cur_index > max_index)
+                       break;
+               nr_found = __lookup_tag(root, results + ret, cur_index,
+                                       max_items - ret, &next_index, tag);
+               ret += nr_found;
+               if (next_index == 0)
+                       break;
+               cur_index = next_index;
+       }
+       return ret;
+}
+
+#endif
+
+/**
+ *     radix_tree_shrink    -    shrink height of a radix tree to minimal
+ *     @root           radix tree root
+ */
+static inline void radix_tree_shrink(struct radix_tree_root *root)
+{
+       /* try to shrink tree height */
+       while (root->height > 1 &&
+                       root->rnode->count == 1 &&
+                       root->rnode->slots[0]) {
+               struct radix_tree_node *to_free = root->rnode;
+
+               root->rnode = to_free->slots[0];
+               root->height--;
+               /* must only free zeroed nodes into the slab */
+#ifdef RADIX_TREE_TAGS
+               tag_clear(to_free, 0, 0);
+               tag_clear(to_free, 1, 0);
+#endif
+               to_free->slots[0] = NULL;
+               to_free->count = 0;
+               radix_tree_node_free(to_free);
+       }
+}
+
+/**
+ *     radix_tree_delete    -    delete an item from a radix tree
+ *     @root:          radix tree root
+ *     @index:         index key
+ *
+ *     Remove the item at @index from the radix tree rooted at @root.
+ *
+ *     Returns the address of the deleted item, or NULL if it was not present.
+ */
+void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
+{
+       struct radix_tree_path path[RADIX_TREE_MAX_PATH], *pathp = path;
+       struct radix_tree_path *orig_pathp;
+       struct radix_tree_node *slot;
+       unsigned int height, shift;
+       void *ret = NULL;
+#ifdef RADIX_TREE_TAGS
+       char tags[RADIX_TREE_MAX_TAGS];
+       int nr_cleared_tags;
+       int tag;
+#endif
+       int offset;
+
+       height = root->height;
+       if (index > radix_tree_maxindex(height))
+               goto out;
+
+       shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
+       pathp->node = NULL;
+       slot = root->rnode;
+
+       for ( ; height > 0; height--) {
+               if (slot == NULL)
+                       goto out;
+
+               pathp++;
+               offset = (index >> shift) & RADIX_TREE_MAP_MASK;
+               pathp->offset = offset;
+               pathp->node = slot;
+               slot = slot->slots[offset];
+               shift -= RADIX_TREE_MAP_SHIFT;
+       }
+
+       ret = slot;
+       if (ret == NULL)
+               goto out;
+
+       orig_pathp = pathp;
+
+#ifdef RADIX_TREE_TAGS
+       /*
+        * Clear all tags associated with the just-deleted item
+        */
+       nr_cleared_tags = 0;
+       for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
+               tags[tag] = 1;
+               if (tag_get(pathp->node, tag, pathp->offset)) {
+                       tag_clear(pathp->node, tag, pathp->offset);
+                       if (!any_tag_set(pathp->node, tag)) {
+                               tags[tag] = 0;
+                               nr_cleared_tags++;
+                       }
+               }
+       }
+
+       for (pathp--; nr_cleared_tags && pathp->node; pathp--) {
+               for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
+                       if (tags[tag])
+                               continue;
+
+                       tag_clear(pathp->node, tag, pathp->offset);
+                       if (any_tag_set(pathp->node, tag)) {
+                               tags[tag] = 1;
+                               nr_cleared_tags--;
+                       }
+               }
+       }
+#endif
+       /* Now free the nodes we do not need anymore */
+       for (pathp = orig_pathp; pathp->node; pathp--) {
+               pathp->node->slots[pathp->offset] = NULL;
+               pathp->node->count--;
+
+               if (pathp->node->count) {
+                       if (pathp->node == root->rnode)
+                               radix_tree_shrink(root);
+                       goto out;
+               }
+
+               /* Node with zero slots in use so free it */
+               radix_tree_node_free(pathp->node);
+       }
+       root->rnode = NULL;
+       root->height = 0;
+out:
+       return ret;
+}
+
+#ifdef RADIX_TREE_TAGS
+/**
+ *     radix_tree_tagged - test whether any items in the tree are tagged
+ *     @root:          radix tree root
+ *     @tag:           tag to test
+ */
+int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag)
+{
+       struct radix_tree_node *rnode;
+       rnode = root->rnode;
+       if (!rnode)
+               return 0;
+       return any_tag_set(rnode, tag);
+}
+#endif
+
+static unsigned long __maxindex(unsigned int height)
+{
+       unsigned int tmp = height * RADIX_TREE_MAP_SHIFT;
+       unsigned long index = (~0UL >> (RADIX_TREE_INDEX_BITS - tmp - 1)) >> 1;
+
+       if (tmp >= RADIX_TREE_INDEX_BITS)
+               index = ~0UL;
+       return index;
+}
+
+static void radix_tree_init_maxindex(void)
+{
+       unsigned int i;
+
+       for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++)
+               height_to_maxindex[i] = __maxindex(i);
+}
+
+void radix_tree_init(void)
+{
+       radix_tree_init_maxindex();
+}
diff --git a/repair/radix-tree.h b/repair/radix-tree.h
new file mode 100644 (file)
index 0000000..e16e08d
--- /dev/null
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2001 Momchil Velikov
+ * Portions Copyright (C) 2001 Christoph Hellwig
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#ifndef __XFS_SUPPORT_RADIX_TREE_H__
+#define __XFS_SUPPORT_RADIX_TREE_H__
+
+#define RADIX_TREE_TAGS
+
+struct radix_tree_root {
+       unsigned int            height;
+       struct radix_tree_node  *rnode;
+};
+
+#define RADIX_TREE_INIT(mask)  {                                       \
+       .height = 0,                                                    \
+       .rnode = NULL,                                                  \
+}
+
+#define RADIX_TREE(name, mask) \
+       struct radix_tree_root name = RADIX_TREE_INIT(mask)
+
+#define INIT_RADIX_TREE(root, mask)                                    \
+do {                                                                   \
+       (root)->height = 0;                                             \
+       (root)->rnode = NULL;                                           \
+} while (0)
+
+#ifdef RADIX_TREE_TAGS
+#define RADIX_TREE_MAX_TAGS 2
+#endif
+
+int radix_tree_insert(struct radix_tree_root *, unsigned long, void *);
+void *radix_tree_lookup(struct radix_tree_root *, unsigned long);
+void **radix_tree_lookup_slot(struct radix_tree_root *, unsigned long);
+void *radix_tree_lookup_first(struct radix_tree_root *, unsigned long *);
+void *radix_tree_delete(struct radix_tree_root *, unsigned long);
+unsigned int
+radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
+                       unsigned long first_index, unsigned int max_items);
+unsigned int
+radix_tree_gang_lookup_ex(struct radix_tree_root *root, void **results,
+                       unsigned long first_index, unsigned long last_index,
+                       unsigned int max_items);
+
+void radix_tree_init(void);
+
+#ifdef RADIX_TREE_TAGS
+void *radix_tree_tag_set(struct radix_tree_root *root,
+                       unsigned long index, unsigned int tag);
+void *radix_tree_tag_clear(struct radix_tree_root *root,
+                       unsigned long index, unsigned int tag);
+int radix_tree_tag_get(struct radix_tree_root *root,
+                       unsigned long index, unsigned int tag);
+unsigned int
+radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
+                       unsigned long first_index, unsigned int max_items,
+                       unsigned int tag);
+int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag);
+#endif
+
+#endif /* __XFS_SUPPORT_RADIX_TREE_H__ */
index 7645128dee065445f47f173a6d963ac1ae539036..7036aa2b238a9aa4e6d8d4d910e5819c65b5060c 100644 (file)
@@ -91,7 +91,7 @@ generate_rtinfo(xfs_mount_t   *mp,
                bits = 0;
                for (i = 0; i < sizeof(xfs_rtword_t) * NBBY &&
                                extno < mp->m_sb.sb_rextents; i++, extno++)  {
-                       if (get_rtbmap(extno) == XR_E_FREE)  {
+                       if (get_rtbno_state(mp, extno) == XR_E_FREE)  {
                                sb_frextents++;
                                bits |= freebit;
 
@@ -218,7 +218,7 @@ process_rtbitmap(xfs_mount_t        *mp,
                     bit < bitsperblock && extno < mp->m_sb.sb_rextents;
                     bit++, extno++) {
                        if (xfs_isset(words, bit)) {
-                               set_rtbmap(extno, XR_E_FREE);
+                               set_rtbno_state(mp, extno, XR_E_FREE);
                                sb_frextents++;
                                if (prevbit == 0) {
                                        start_bmbno = bmbno;
index f2bf863073d1ef9ca514d1f115476f84afa037ae..18ac38513c81d45b2174dc74f7d7a13342ac21e0 100644 (file)
@@ -148,9 +148,6 @@ scanfunc_bmap(
        xfs_dfiloff_t           last_key;
        char                    *forkname;
        int                     numrecs;
-       xfs_agnumber_t          agno;
-       xfs_agblock_t           agbno;
-       int                     state;
 
        if (whichfork == XFS_DATA_FORK)
                forkname = _("data");
@@ -232,16 +229,11 @@ _("bad back (left) sibling pointer (saw %llu should be NULL (0))\n"
                bm_cursor->level[level].right_fsbno =
                                        be64_to_cpu(block->bb_u.l.bb_rightsib);
 
-               agno = XFS_FSB_TO_AGNO(mp, bno);
-               agbno = XFS_FSB_TO_AGBNO(mp, bno);
-
-               pthread_mutex_lock(&ag_locks[agno]);
-               state = get_bmap(agno, agbno);
-               switch (state) {
+               switch (get_fsbno_state(mp, bno))  {
                case XR_E_UNKNOWN:
                case XR_E_FREE1:
                case XR_E_FREE:
-                       set_bmap(agno, agbno, XR_E_INUSE);
+                       set_fsbno_state(mp, bno, XR_E_INUSE);
                        break;
                case XR_E_FS_MAP:
                case XR_E_INUSE:
@@ -253,17 +245,19 @@ _("bad back (left) sibling pointer (saw %llu should be NULL (0))\n"
                         * we made it here, the block probably
                         * contains btree data.
                         */
-                       set_bmap(agno, agbno, XR_E_MULT);
+                       set_fsbno_state(mp, bno, XR_E_MULT);
                        do_warn(
                _("inode 0x%llx bmap block 0x%llx claimed, state is %d\n"),
-                               ino, (__uint64_t) bno, state);
+                               ino, (__uint64_t) bno,
+                               get_fsbno_state(mp, bno));
                        break;
                case XR_E_MULT:
                case XR_E_INUSE_FS:
-                       set_bmap(agno, agbno, XR_E_MULT);
+                       set_fsbno_state(mp, bno, XR_E_MULT);
                        do_warn(
                _("inode 0x%llx bmap block 0x%llx claimed, state is %d\n"),
-                               ino, (__uint64_t) bno, state);
+                               ino, (__uint64_t) bno,
+                               get_fsbno_state(mp, bno));
                        /*
                         * if we made it to here, this is probably a bmap block
                         * that is being used by *another* file as a bmap block
@@ -278,19 +272,18 @@ _("bad back (left) sibling pointer (saw %llu should be NULL (0))\n"
                default:
                        do_warn(
                _("bad state %d, inode 0x%llx bmap block 0x%llx\n"),
-                               state, ino, (__uint64_t) bno);
+                               get_fsbno_state(mp, bno),
+                               ino, (__uint64_t) bno);
                        break;
                }
-               pthread_mutex_unlock(&ag_locks[agno]);
        } else  {
                /*
                 * attribute fork for realtime files is in the regular
                 * filesystem
                 */
                if (type != XR_INO_RTDATA || whichfork != XFS_DATA_FORK)  {
-                       if (search_dup_extent(XFS_FSB_TO_AGNO(mp, bno),
-                                       XFS_FSB_TO_AGBNO(mp, bno),
-                                       XFS_FSB_TO_AGBNO(mp, bno) + 1))
+                       if (search_dup_extent(mp, XFS_FSB_TO_AGNO(mp, bno),
+                                       XFS_FSB_TO_AGBNO(mp, bno)))
                                return(1);
                } else  {
                        if (search_rt_dup_extent(mp, bno))
@@ -485,15 +478,19 @@ scanfunc_allocbt(
        /*
         * check for btree blocks multiply claimed
         */
-       state = get_bmap(agno, bno);
-       switch (state != XR_E_UNKNOWN)  {
-               set_bmap(agno, bno, XR_E_MULT);
+       state = get_agbno_state(mp, agno, bno);
+
+       switch (state)  {
+       case XR_E_UNKNOWN:
+               set_agbno_state(mp, agno, bno, XR_E_FS_MAP);
+               break;
+       default:
+               set_agbno_state(mp, agno, bno, XR_E_MULT);
                do_warn(
 _("%s freespace btree block claimed (state %d), agno %d, bno %d, suspect %d\n"),
                                name, state, agno, bno, suspect);
                return;
        }
-       set_bmap(agno, bno, XR_E_FS_MAP);
 
        numrecs = be16_to_cpu(block->bb_numrecs);
 
@@ -514,7 +511,7 @@ _("%s freespace btree block claimed (state %d), agno %d, bno %d, suspect %d\n"),
                rp = XFS_ALLOC_REC_ADDR(mp, block, 1);
                for (i = 0; i < numrecs; i++) {
                        xfs_agblock_t           b, end;
-                       xfs_extlen_t            len, blen;
+                       xfs_extlen_t            len;
 
                        b = be32_to_cpu(rp[i].ar_startblock);
                        len = be32_to_cpu(rp[i].ar_blockcount);
@@ -527,11 +524,12 @@ _("%s freespace btree block claimed (state %d), agno %d, bno %d, suspect %d\n"),
                        if (!verify_agbno(mp, agno, end - 1))
                                continue;
 
-                       for ( ; b < end; b += blen)  {
-                               state = get_bmap_ext(agno, b, end, &blen);
+                       for ( ; b < end; b++)  {
+                               state = get_agbno_state(mp, agno, b);
                                switch (state) {
                                case XR_E_UNKNOWN:
-                                       set_bmap(agno, b, XR_E_FREE1);
+                                       set_agbno_state(mp, agno, b,
+                                                       XR_E_FREE1);
                                        break;
                                case XR_E_FREE1:
                                        /*
@@ -539,15 +537,14 @@ _("%s freespace btree block claimed (state %d), agno %d, bno %d, suspect %d\n"),
                                         * FREE1 blocks later
                                         */
                                        if (magic == XFS_ABTC_MAGIC) {
-                                               set_bmap_ext(agno, b, blen,
-                                                            XR_E_FREE);
+                                               set_agbno_state(mp, agno, b,
+                                                               XR_E_FREE);
                                                break;
                                        }
                                default:
                                        do_warn(
-       _("block (%d,%d-%d) multiply claimed by %s space tree, state - %d\n"),
-                                               agno, b, b + blen - 1,
-                                               name, state);
+       _("block (%d,%d) multiply claimed by %s space tree, state - %d\n"),
+                                               agno, b, name, state);
                                        break;
                                }
                        }
@@ -703,14 +700,13 @@ _("bad ending inode # (%llu (0x%x 0x%x)) in ino rec, skipping rec\n"),
                     j < XFS_INODES_PER_CHUNK;
                     j += mp->m_sb.sb_inopblock)  {
                        agbno = XFS_AGINO_TO_AGBNO(mp, ino + j);
-
-                       state = get_bmap(agno, agbno);
+                       state = get_agbno_state(mp, agno, agbno);
                        if (state == XR_E_UNKNOWN)  {
-                               set_bmap(agno, agbno, XR_E_INO);
+                               set_agbno_state(mp, agno, agbno, XR_E_INO);
                        } else if (state == XR_E_INUSE_FS && agno == 0 &&
                                   ino + j >= first_prealloc_ino &&
                                   ino + j < last_prealloc_ino)  {
-                               set_bmap(agno, agbno, XR_E_INO);
+                               set_agbno_state(mp, agno, agbno, XR_E_INO);
                        } else  {
                                do_warn(
 _("inode chunk claims used block, inobt block - agno %d, bno %d, inopb %d\n"),
@@ -847,15 +843,16 @@ scanfunc_ino(
         * check for btree blocks multiply claimed, any unknown/free state
         * is ok in the bitmap block.
         */
-       state = get_bmap(agno, bno);
+       state = get_agbno_state(mp, agno, bno);
+
        switch (state)  {
        case XR_E_UNKNOWN:
        case XR_E_FREE1:
        case XR_E_FREE:
-               set_bmap(agno, bno, XR_E_FS_MAP);
+               set_agbno_state(mp, agno, bno, XR_E_FS_MAP);
                break;
        default:
-               set_bmap(agno, bno, XR_E_MULT);
+               set_agbno_state(mp, agno, bno, XR_E_MULT);
                do_warn(
 _("inode btree block claimed (state %d), agno %d, bno %d, suspect %d\n"),
                        state, agno, bno, suspect);
@@ -957,7 +954,7 @@ scan_freelist(
        if (XFS_SB_BLOCK(mp) != XFS_AGFL_BLOCK(mp) &&
            XFS_AGF_BLOCK(mp) != XFS_AGFL_BLOCK(mp) &&
            XFS_AGI_BLOCK(mp) != XFS_AGFL_BLOCK(mp))
-               set_bmap(agno, XFS_AGFL_BLOCK(mp), XR_E_FS_MAP);
+               set_agbno_state(mp, agno, XFS_AGFL_BLOCK(mp), XR_E_FS_MAP);
 
        if (be32_to_cpu(agf->agf_flcount) == 0)
                return;
@@ -975,7 +972,7 @@ scan_freelist(
        for (;;) {
                bno = be32_to_cpu(agfl->agfl_bno[i]);
                if (verify_agbno(mp, agno, bno))
-                       set_bmap(agno, bno, XR_E_FREE);
+                       set_agbno_state(mp, agno, bno, XR_E_FREE);
                else
                        do_warn(_("bad agbno %u in agfl, agno %d\n"),
                                bno, agno);
index e36eeae9adfc78a46965235e499710c4e6ea088f..8bf20bbfde1e823d1981ad55a8e6642e8b7f167a 100644 (file)
@@ -39,6 +39,7 @@ extern void   phase4(xfs_mount_t *);
 extern void    phase5(xfs_mount_t *);
 extern void    phase6(xfs_mount_t *);
 extern void    phase7(xfs_mount_t *);
+extern void    incore_init(xfs_mount_t *);
 
 #define                XR_MAX_SECT_SIZE        (64 * 1024)
 
@@ -535,6 +536,11 @@ main(int argc, char **argv)
        bindtextdomain(PACKAGE, LOCALEDIR);
        textdomain(PACKAGE);
 
+#ifdef XR_PF_TRACE
+       pf_trace_file = fopen("/tmp/xfs_repair_prefetch.trace", "w");
+       setvbuf(pf_trace_file, NULL, _IOLBF, 1024);
+#endif
+
        temp_mp = &xfs_m;
        setbuf(stdout, NULL);
 
@@ -687,14 +693,9 @@ main(int argc, char **argv)
        calc_mkfs(mp);
 
        /*
-        * initialize block alloc map
+        * check sb filesystem stats and initialize in-core data structures
         */
-       init_bmaps(mp);
-       incore_ino_init(mp);
-       incore_ext_init(mp);
-
-       /* initialize random globals now that we know the fs geometry */
-       inodes_per_block = mp->m_sb.sb_inopblock;
+       incore_init(mp);
 
        if (parse_sb_version(&mp->m_sb))  {
                do_warn(
@@ -722,11 +723,6 @@ main(int argc, char **argv)
        }
        timestamp(PHASE_END, 5, NULL);
 
-       /*
-        * Done with the block usage maps, toss them...
-        */
-       free_bmaps(mp);
-
        if (!bad_ino_btree)  {
                phase6(mp);
                timestamp(PHASE_END, 6, NULL);
@@ -848,7 +844,8 @@ _("Note - stripe unit (%d) and width (%d) fields have been reset.\n"
        if (verbose)
                summary_report();
        do_log(_("done\n"));
-       pftrace_done();
-
+#ifdef XR_PF_TRACE
+       fclose(pf_trace_file);
+#endif
        return (0);
 }