#
PKG_MAJOR=3
PKG_MINOR=0
-PKG_REVISION=5
+PKG_REVISION=4
PKG_BUILD=1
-xfsprogs-3.0.5 (23 October 2009)
- - Use btrees in xfs_repair in a number of critical data
- structures, in place of bitmaps and radix trees, resulting
- in reduced memory and CPU requirements for large file
- systems.
- - Various other performance improvements in xfs_repair.
- - Add a new function lpath_to_handle() to libhandle, which
- allows symlinks to be handled more robustly.
- - Tweak the code so a single scanfunc_allocbt() can be used
- in place of the two nearly-identical functions used before.
- - Add support for discarding blocks to mkfs (along with a
- command-line option to avoid its use if desired).
- - Allow use of libblkid from util-linux if it is available,
- for determining device geometry.
- - A few configuration and build improvements.
-
xfsprogs-3.0.4 (17 September 2009)
- Fix a memory leak in xfsprogs.
- Increase hash chain length in xfsprogs when running out of memory.
struct parent;
extern int path_to_handle (char *__path, void **__hanp, size_t *__hlen);
-extern int lpath_to_handle (char *__fspath, char *__path,
- void **__hanp, size_t *__hlen);
extern int path_to_fshandle (char *__path, void **__fshanp, size_t *__fshlen);
extern int handle_to_fshandle (void *__hanp, size_t __hlen, void **__fshanp,
size_t *__fshlen);
char *path, /* input, path to convert */
void **hanp, /* output, pointer to data */
size_t *hlen) /* output, size of returned data */
-{
- return lpath_to_handle(path, path, hanp, hlen);
-}
-
-/* Like path_to_handle, but reliable for paths which are either dangling
- * symlinks or symlinks whose targets are not in XFS filesystems.
- */
-int
-lpath_to_handle(
- char *fspath, /* input, path in filesystem */
- char *path, /* input, path to convert */
- void **hanp, /* output, pointer to data */
- size_t *hlen) /* output, size of returned data */
{
int fd;
int result;
comarg_t obj;
- fd = open(fspath, O_RDONLY);
+ fd = open(path, O_RDONLY);
if (fd < 0)
return -1;
obj.path = path;
- result = obj_to_handle(fspath, fd, XFS_IOC_PATH_TO_HANDLE,
+ result = obj_to_handle(path, fd, XFS_IOC_PATH_TO_HANDLE,
obj, hanp, hlen);
close(fd);
return result;
LTCOMMAND = xfs_repair
-HFILES = agheader.h attr_repair.h avl.h avl64.h bmap.h btree.h \
- dinode.h dir.h dir2.h err_protos.h globals.h incore.h protos.h rt.h \
- progress.h scan.h versions.h prefetch.h threads.h
+HFILES = agheader.h attr_repair.h avl.h avl64.h bmap.h dinode.h dir.h \
+ dir2.h err_protos.h globals.h incore.h protos.h rt.h \
+ progress.h scan.h versions.h prefetch.h radix-tree.h threads.h
-CFILES = agheader.c attr_repair.c avl.c avl64.c bmap.c btree.c \
- dino_chunks.c dinode.c dir.c dir2.c globals.c incore.c \
+CFILES = agheader.c attr_repair.c avl.c avl64.c bmap.c dino_chunks.c \
+ dinode.c dir.c dir2.c globals.c incore.c \
incore_bmc.c init.c incore_ext.c incore_ino.c phase1.c \
phase2.c phase3.c phase4.c phase5.c phase6.c phase7.c \
- progress.c prefetch.c rt.c sb.c scan.c threads.c \
+ progress.c prefetch.c radix-tree.c rt.c sb.c scan.c threads.c \
versions.c xfs_repair.c
LLDLIBS = $(LIBXFS) $(LIBXLOG) $(LIBUUID) $(LIBRT) $(LIBPTHREAD)
#
# Tracing flags:
+# -DXR_BMAP_DBG incore block bitmap debugging
# -DXR_INODE_TRACE inode processing
+# -DXR_BMAP_TRACE bmap btree processing
# -DXR_DIR_TRACE directory processing
# -DXR_DUP_TRACE duplicate extent processing
# -DXR_BCNT_TRACE incore bcnt freespace btree building
/*
- * Copyright (c) 2000-2001,2005,2008 Silicon Graphics, Inc.
+ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
#include "bmap.h"
/*
- * Track the logical to physical block mapping for inodes.
- *
- * Repair only processes one inode at a given time per thread, and the
- * block map does not have to outlive the processing of a single inode.
- *
- * The combination of those factors means we can use pthreads thread-local
- * storage to store the block map, and we can re-use the allocation over
- * and over again.
+ * Block mapping code taken from xfs_db.
+ */
+
+/*
+ * Append an extent to the block entry.
*/
+void
+blkent_append(
+ blkent_t **entp,
+ xfs_dfsbno_t b,
+ xfs_dfilblks_t c)
+{
+ blkent_t *ent;
+ size_t size;
+ int i;
-pthread_key_t dblkmap_key;
-pthread_key_t ablkmap_key;
+ ent = *entp;
+ size = BLKENT_SIZE(c + ent->nblks);
+ if ((*entp = ent = realloc(ent, size)) == NULL) {
+ do_warn(_("realloc failed in blkent_append (%u bytes)\n"),
+ size);
+ return;
+ }
+ for (i = 0; i < c; i++)
+ ent->blks[ent->nblks + i] = b + i;
+ ent->nblks += c;
+}
+
+/*
+ * Make a new block entry.
+ */
+blkent_t *
+blkent_new(
+ xfs_dfiloff_t o,
+ xfs_dfsbno_t b,
+ xfs_dfilblks_t c)
+{
+ blkent_t *ent;
+ int i;
+
+ if ((ent = malloc(BLKENT_SIZE(c))) == NULL) {
+ do_warn(_("malloc failed in blkent_new (%u bytes)\n"),
+ BLKENT_SIZE(c));
+ return ent;
+ }
+ ent->nblks = c;
+ ent->startoff = o;
+ for (i = 0; i < c; i++)
+ ent->blks[i] = b + i;
+ return ent;
+}
+/*
+ * Prepend an extent to the block entry.
+ */
+void
+blkent_prepend(
+ blkent_t **entp,
+ xfs_dfsbno_t b,
+ xfs_dfilblks_t c)
+{
+ int i;
+ blkent_t *newent;
+ blkent_t *oldent;
+
+ oldent = *entp;
+ if ((newent = malloc(BLKENT_SIZE(oldent->nblks + c))) == NULL) {
+ do_warn(_("malloc failed in blkent_prepend (%u bytes)\n"),
+ BLKENT_SIZE(oldent->nblks + c));
+ *entp = newent;
+ return;
+ }
+ newent->nblks = oldent->nblks + c;
+ newent->startoff = oldent->startoff - c;
+ for (i = 0; i < c; i++)
+ newent->blks[i] = b + c;
+ for (; i < oldent->nblks + c; i++)
+ newent->blks[i] = oldent->blks[i - c];
+ free(oldent);
+ *entp = newent;
+}
+
+/*
+ * Allocate a block map.
+ */
blkmap_t *
blkmap_alloc(
- xfs_extnum_t nex,
- int whichfork)
+ xfs_extnum_t nex)
{
- pthread_key_t key;
blkmap_t *blkmap;
- ASSERT(whichfork == XFS_DATA_FORK || whichfork == XFS_ATTR_FORK);
-
if (nex < 1)
nex = 1;
-
- key = whichfork ? ablkmap_key : dblkmap_key;
- blkmap = pthread_getspecific(key);
- if (!blkmap || blkmap->naexts < nex) {
- blkmap = realloc(blkmap, BLKMAP_SIZE(nex));
- if (!blkmap) {
- do_warn(_("malloc failed in blkmap_alloc (%u bytes)\n"),
- BLKMAP_SIZE(nex));
- return NULL;
- }
- pthread_setspecific(key, blkmap);
- blkmap->naexts = nex;
+ if ((blkmap = malloc(BLKMAP_SIZE(nex))) == NULL) {
+ do_warn(_("malloc failed in blkmap_alloc (%u bytes)\n"),
+ BLKMAP_SIZE(nex));
+ return blkmap;
}
-
- blkmap->nexts = 0;
+ blkmap->naents = nex;
+ blkmap->nents = 0;
return blkmap;
}
blkmap_free(
blkmap_t *blkmap)
{
- /* nothing to do! - keep the memory around for the next inode */
+ blkent_t **entp;
+ xfs_extnum_t i;
+
+ if (blkmap == NULL)
+ return;
+ for (i = 0, entp = blkmap->ents; i < blkmap->nents; i++, entp++)
+ free(*entp);
+ free(blkmap);
}
/*
blkmap_t *blkmap,
xfs_dfiloff_t o)
{
- bmap_ext_t *ext = blkmap->exts;
+ blkent_t *ent;
+ blkent_t **entp;
int i;
- for (i = 0; i < blkmap->nexts; i++, ext++) {
- if (o >= ext->startoff && o < ext->startoff + ext->blockcount)
- return ext->startblock + (o - ext->startoff);
+ for (i = 0, entp = blkmap->ents; i < blkmap->nents; i++, entp++) {
+ ent = *entp;
+ if (o >= ent->startoff && o < ent->startoff + ent->nblks)
+ return ent->blks[o - ent->startoff];
}
return NULLDFSBNO;
}
/*
- * Get a chunk of entries from a block map - only used for reading dirv2 blocks
+ * Get a chunk of entries from a block map.
*/
int
blkmap_getn(
bmap_ext_t **bmpp,
bmap_ext_t *bmpp_single)
{
- bmap_ext_t *bmp = NULL;
- bmap_ext_t *ext;
+ bmap_ext_t *bmp;
+ blkent_t *ent;
+ xfs_dfiloff_t ento;
+ blkent_t **entp;
int i;
int nex;
if (nb == 1) {
- /*
+ /*
* in the common case, when mp->m_dirblkfsbs == 1,
* avoid additional malloc/free overhead
*/
bmpp_single->startblock = blkmap_get(blkmap, o);
- goto single_ext;
+ bmpp_single->blockcount = 1;
+ bmpp_single->startoff = 0;
+ bmpp_single->flag = 0;
+ *bmpp = bmpp_single;
+ return (bmpp_single->startblock != NULLDFSBNO) ? 1 : 0;
}
- ext = blkmap->exts;
- nex = 0;
- for (i = 0; i < blkmap->nexts; i++, ext++) {
-
- if (ext->startoff >= o + nb)
+ for (i = nex = 0, bmp = NULL, entp = blkmap->ents;
+ i < blkmap->nents;
+ i++, entp++) {
+ ent = *entp;
+ if (ent->startoff >= o + nb)
break;
- if (ext->startoff + ext->blockcount <= o)
+ if (ent->startoff + ent->nblks <= o)
continue;
-
- /*
- * if all the requested blocks are in one extent (also common),
- * use the bmpp_single option as well
- */
- if (!bmp && o >= ext->startoff &&
- o + nb <= ext->startoff + ext->blockcount) {
- bmpp_single->startblock =
- ext->startblock + (o - ext->startoff);
- goto single_ext;
+ for (ento = ent->startoff;
+ ento < ent->startoff + ent->nblks && ento < o + nb;
+ ento++) {
+ if (ento < o)
+ continue;
+ if (bmp &&
+ bmp[nex - 1].startoff + bmp[nex - 1].blockcount ==
+ ento &&
+ bmp[nex - 1].startblock + bmp[nex - 1].blockcount ==
+ ent->blks[ento - ent->startoff])
+ bmp[nex - 1].blockcount++;
+ else {
+ bmp = realloc(bmp, ++nex * sizeof(*bmp));
+ if (bmp == NULL) {
+ do_warn(_("blkmap_getn realloc failed"
+ " (%u bytes)\n"),
+ nex * sizeof(*bmp));
+ continue;
+ }
+ bmp[nex - 1].startoff = ento;
+ bmp[nex - 1].startblock =
+ ent->blks[ento - ent->startoff];
+ bmp[nex - 1].blockcount = 1;
+ bmp[nex - 1].flag = 0;
+ }
}
-
- /*
- * rare case - multiple extents for a single dir block
- */
- bmp = malloc(nb * sizeof(bmap_ext_t));
- if (!bmp)
- do_error(_("blkmap_getn malloc failed (%u bytes)\n"),
- nb * sizeof(bmap_ext_t));
-
- bmp[nex].startblock = ext->startblock + (o - ext->startoff);
- bmp[nex].blockcount = MIN(nb, ext->blockcount -
- (bmp[nex].startblock - ext->startblock));
- o += bmp[nex].blockcount;
- nb -= bmp[nex].blockcount;
- nex++;
}
*bmpp = bmp;
return nex;
+}
+
+/*
+ * Make a block map larger.
+ */
+void
+blkmap_grow(
+ blkmap_t **blkmapp,
+ blkent_t **entp,
+ blkent_t *newent)
+{
+ blkmap_t *blkmap;
+ size_t size;
+ int i;
+ int idx;
-single_ext:
- bmpp_single->blockcount = nb;
- bmpp_single->startoff = 0; /* not even used by caller! */
- *bmpp = bmpp_single;
- return (bmpp_single->startblock != NULLDFSBNO) ? 1 : 0;
+ blkmap = *blkmapp;
+ idx = (int)(entp - blkmap->ents);
+ if (blkmap->naents == blkmap->nents) {
+ size = BLKMAP_SIZE(blkmap->nents + 1);
+ if ((*blkmapp = blkmap = realloc(blkmap, size)) == NULL) {
+ do_warn(_("realloc failed in blkmap_grow (%u bytes)\n"),
+ size);
+ return;
+ }
+ blkmap->naents++;
+ }
+ for (i = blkmap->nents; i > idx; i--)
+ blkmap->ents[i] = blkmap->ents[i - 1];
+ blkmap->ents[idx] = newent;
+ blkmap->nents++;
}
/*
blkmap_last_off(
blkmap_t *blkmap)
{
- bmap_ext_t *ext;
+ blkent_t *ent;
- if (!blkmap->nexts)
+ if (!blkmap->nents)
return NULLDFILOFF;
- ext = blkmap->exts + blkmap->nexts - 1;
- return ext->startoff + ext->blockcount;
+ ent = blkmap->ents[blkmap->nents - 1];
+ return ent->startoff + ent->nblks;
}
/*
xfs_dfiloff_t o,
int *t)
{
- bmap_ext_t *ext;
+ blkent_t *ent;
+ blkent_t **entp;
- if (!blkmap->nexts)
+ if (!blkmap->nents)
return NULLDFILOFF;
if (o == NULLDFILOFF) {
*t = 0;
- return blkmap->exts[0].startoff;
+ ent = blkmap->ents[0];
+ return ent->startoff;
}
- ext = blkmap->exts + *t;
- if (o < ext->startoff + ext->blockcount - 1)
+ entp = &blkmap->ents[*t];
+ ent = *entp;
+ if (o < ent->startoff + ent->nblks - 1)
return o + 1;
- if (*t >= blkmap->nexts - 1)
+ entp++;
+ if (entp >= &blkmap->ents[blkmap->nents])
return NULLDFILOFF;
(*t)++;
- return ext[1].startoff;
+ ent = *entp;
+ return ent->startoff;
}
/*
- * Make a block map larger.
+ * Set a block value in a block map.
*/
-static blkmap_t *
-blkmap_grow(
- blkmap_t **blkmapp)
+void
+blkmap_set_blk(
+ blkmap_t **blkmapp,
+ xfs_dfiloff_t o,
+ xfs_dfsbno_t b)
{
- pthread_key_t key = dblkmap_key;
- blkmap_t *blkmap = *blkmapp;
+ blkmap_t *blkmap;
+ blkent_t *ent;
+ blkent_t **entp;
+ blkent_t *nextent;
- if (pthread_getspecific(key) != blkmap) {
- key = ablkmap_key;
- ASSERT(pthread_getspecific(key) == blkmap);
+ blkmap = *blkmapp;
+ for (entp = blkmap->ents; entp < &blkmap->ents[blkmap->nents]; entp++) {
+ ent = *entp;
+ if (o < ent->startoff - 1) {
+ ent = blkent_new(o, b, 1);
+ blkmap_grow(blkmapp, entp, ent);
+ return;
+ }
+ if (o == ent->startoff - 1) {
+ blkent_prepend(entp, b, 1);
+ return;
+ }
+ if (o >= ent->startoff && o < ent->startoff + ent->nblks) {
+ ent->blks[o - ent->startoff] = b;
+ return;
+ }
+ if (o > ent->startoff + ent->nblks)
+ continue;
+ blkent_append(entp, b, 1);
+ if (entp == &blkmap->ents[blkmap->nents - 1])
+ return;
+ ent = *entp;
+ nextent = entp[1];
+ if (ent->startoff + ent->nblks < nextent->startoff)
+ return;
+ blkent_append(entp, nextent->blks[0], nextent->nblks);
+ blkmap_shrink(blkmap, &entp[1]);
+ return;
}
-
- blkmap->naexts += 4;
- blkmap = realloc(blkmap, BLKMAP_SIZE(blkmap->naexts));
- if (blkmap == NULL)
- do_error(_("realloc failed in blkmap_grow\n"));
- *blkmapp = blkmap;
- pthread_setspecific(key, blkmap);
- return blkmap;
+ ent = blkent_new(o, b, 1);
+ blkmap_grow(blkmapp, entp, ent);
}
/*
xfs_dfsbno_t b,
xfs_dfilblks_t c)
{
- blkmap_t *blkmap = *blkmapp;
+ blkmap_t *blkmap;
+ blkent_t *ent;
+ blkent_t **entp;
xfs_extnum_t i;
- if (blkmap->nexts == blkmap->naexts)
- blkmap = blkmap_grow(blkmapp);
-
- for (i = 0; i < blkmap->nexts; i++) {
- if (blkmap->exts[i].startoff > o) {
- memmove(blkmap->exts + i + 1,
- blkmap->exts + i,
- sizeof(bmap_ext_t) * (blkmap->nexts - i));
- break;
- }
+ blkmap = *blkmapp;
+ if (!blkmap->nents) {
+ blkmap->ents[0] = blkent_new(o, b, c);
+ blkmap->nents = 1;
+ return;
+ }
+ entp = &blkmap->ents[blkmap->nents - 1];
+ ent = *entp;
+ if (ent->startoff + ent->nblks == o) {
+ blkent_append(entp, b, c);
+ return;
+ }
+ if (ent->startoff + ent->nblks < o) {
+ ent = blkent_new(o, b, c);
+ blkmap_grow(blkmapp, &blkmap->ents[blkmap->nents], ent);
+ return;
}
+ for (i = 0; i < c; i++)
+ blkmap_set_blk(blkmapp, o + i, b + i);
+}
+
+/*
+ * Make a block map smaller.
+ */
+void
+blkmap_shrink(
+ blkmap_t *blkmap,
+ blkent_t **entp)
+{
+ int i;
+ int idx;
- blkmap->exts[i].startoff = o;
- blkmap->exts[i].startblock = b;
- blkmap->exts[i].blockcount = c;
- blkmap->nexts++;
+ free(*entp);
+ idx = (int)(entp - blkmap->ents);
+ for (i = idx + 1; i < blkmap->nents; i++)
+ blkmap->ents[i] = blkmap->ents[i - 1];
+ blkmap->nents--;
}
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
-#ifndef _XFS_REPAIR_BMAP_H
-#define _XFS_REPAIR_BMAP_H
+/*
+ * Block mapping code taken from xfs_db.
+ */
/*
- * Extent descriptor.
+ * Block map entry.
*/
-typedef struct bmap_ext {
+typedef struct blkent {
xfs_dfiloff_t startoff;
- xfs_dfsbno_t startblock;
- xfs_dfilblks_t blockcount;
-} bmap_ext_t;
+ xfs_dfilblks_t nblks;
+ xfs_dfsbno_t blks[1];
+} blkent_t;
+#define BLKENT_SIZE(n) \
+ (offsetof(blkent_t, blks) + (sizeof(xfs_dfsbno_t) * (n)))
/*
* Block map.
*/
typedef struct blkmap {
- int naexts;
- int nexts;
- bmap_ext_t exts[1];
+ int naents;
+ int nents;
+ blkent_t *ents[1];
} blkmap_t;
-
#define BLKMAP_SIZE(n) \
- (offsetof(blkmap_t, exts) + (sizeof(bmap_ext_t) * (n)))
-
-blkmap_t *blkmap_alloc(xfs_extnum_t nex, int whichfork);
-void blkmap_free(blkmap_t *blkmap);
+ (offsetof(blkmap_t, ents) + (sizeof(blkent_t *) * (n)))
-void blkmap_set_ext(blkmap_t **blkmapp, xfs_dfiloff_t o,
- xfs_dfsbno_t b, xfs_dfilblks_t c);
+/*
+ * Extent descriptor.
+ */
+typedef struct bmap_ext {
+ xfs_dfiloff_t startoff;
+ xfs_dfsbno_t startblock;
+ xfs_dfilblks_t blockcount;
+ int flag;
+} bmap_ext_t;
+void blkent_append(blkent_t **entp, xfs_dfsbno_t b,
+ xfs_dfilblks_t c);
+blkent_t *blkent_new(xfs_dfiloff_t o, xfs_dfsbno_t b, xfs_dfilblks_t c);
+void blkent_prepend(blkent_t **entp, xfs_dfsbno_t b,
+ xfs_dfilblks_t c);
+blkmap_t *blkmap_alloc(xfs_extnum_t);
+void blkmap_free(blkmap_t *blkmap);
xfs_dfsbno_t blkmap_get(blkmap_t *blkmap, xfs_dfiloff_t o);
int blkmap_getn(blkmap_t *blkmap, xfs_dfiloff_t o,
- xfs_dfilblks_t nb, bmap_ext_t **bmpp,
+ xfs_dfilblks_t nb, bmap_ext_t **bmpp,
bmap_ext_t *bmpp_single);
+void blkmap_grow(blkmap_t **blkmapp, blkent_t **entp,
+ blkent_t *newent);
xfs_dfiloff_t blkmap_last_off(blkmap_t *blkmap);
xfs_dfiloff_t blkmap_next_off(blkmap_t *blkmap, xfs_dfiloff_t o, int *t);
-
-#endif /* _XFS_REPAIR_BMAP_H */
+void blkmap_set_blk(blkmap_t **blkmapp, xfs_dfiloff_t o,
+ xfs_dfsbno_t b);
+void blkmap_set_ext(blkmap_t **blkmapp, xfs_dfiloff_t o,
+ xfs_dfsbno_t b, xfs_dfilblks_t c);
+void blkmap_shrink(blkmap_t *blkmap, blkent_t **entp);
+++ /dev/null
-/*
- * Copyright (c) 2007, Silicon Graphics, Inc. Barry Naujok <bnaujok@sgi.com>
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <libxfs.h>
-#include "btree.h"
-
-
-#define BTREE_KEY_MAX 7
-#define BTREE_KEY_MIN (BTREE_KEY_MAX / 2)
-
-#define BTREE_PTR_MAX (BTREE_KEY_MAX + 1)
-
-struct btree_node {
- unsigned long num_keys;
- unsigned long keys[BTREE_KEY_MAX];
- struct btree_node * ptrs[BTREE_PTR_MAX];
-};
-
-struct btree_cursor {
- struct btree_node *node;
- int index;
-};
-
-struct btree_root {
- struct btree_node *root_node;
- struct btree_cursor *cursor; /* track path to end leaf */
- int height;
- /* lookup cache */
- int keys_valid; /* set if the cache is valid */
- unsigned long cur_key;
- unsigned long next_key;
- void *next_value;
- unsigned long prev_key;
- void *prev_value;
-#ifdef BTREE_STATS
- struct btree_stats {
- unsigned long num_items;
- unsigned long max_items;
- int alloced;
- int cache_hits;
- int cache_misses;
- int lookup;
- int find;
- int key_update;
- int value_update;
- int insert;
- int delete;
- int inc_height;
- int dec_height;
- int shift_prev;
- int shift_next;
- int split;
- int merge_prev;
- int merge_next;
- int balance_prev;
- int balance_next;
- } stats;
-#endif
-};
-
-
-static struct btree_node *
-btree_node_alloc(void)
-{
- return calloc(1, sizeof(struct btree_node));
-}
-
-static void
-btree_node_free(
- struct btree_node *node)
-{
- free(node);
-}
-
-static void
-btree_free_nodes(
- struct btree_node *node,
- int level)
-{
- int i;
-
- if (level)
- for (i = 0; i <= node->num_keys; i++)
- btree_free_nodes(node->ptrs[i], level - 1);
- btree_node_free(node);
-}
-
-static void
-__btree_init(
- struct btree_root *root)
-{
- memset(root, 0, sizeof(struct btree_root));
- root->height = 1;
- root->cursor = calloc(1, sizeof(struct btree_cursor));
- root->root_node = btree_node_alloc();
- ASSERT(root->root_node);
-#ifdef BTREE_STATS
- root->stats.max_items = 1;
- root->stats.alloced += 1;
-#endif
-}
-
-static void
-__btree_free(
- struct btree_root *root)
-{
- btree_free_nodes(root->root_node, root->height - 1);
- free(root->cursor);
- root->height = 0;
- root->cursor = NULL;
- root->root_node = NULL;
-}
-
-void
-btree_init(
- struct btree_root **root)
-{
- *root = calloc(1, sizeof(struct btree_root));
- __btree_init(*root);
-}
-
-void
-btree_clear(
- struct btree_root *root)
-{
- __btree_free(root);
- __btree_init(root);
-}
-
-void
-btree_destroy(
- struct btree_root *root)
-{
- __btree_free(root);
- free(root);
-}
-
-int
-btree_is_empty(
- struct btree_root *root)
-{
- return root->root_node->num_keys == 0;
-}
-
-static inline void
-btree_invalidate_cursor(
- struct btree_root *root)
-{
- root->cursor[0].node = NULL;
- root->keys_valid = 0;
-}
-
-static inline unsigned long
-btree_key_of_cursor(
- struct btree_cursor *cursor,
- int height)
-{
- while (cursor->node->num_keys == cursor->index && --height > 0)
- cursor++;
- return cursor->node->keys[cursor->index];
-}
-
-static void *
-btree_get_prev(
- struct btree_root *root,
- unsigned long *key)
-{
- struct btree_cursor *cur = root->cursor;
- int level = 0;
- struct btree_node *node;
-
- if (cur->index > 0) {
- if (key)
- *key = cur->node->keys[cur->index - 1];
- return cur->node->ptrs[cur->index - 1];
- }
-
- /* else need to go up and back down the tree to find the previous */
-
- while (cur->index == 0) {
- if (++level == root->height)
- return NULL;
- cur++;
- }
-
- /* the key is in the current level */
- if (key)
- *key = cur->node->keys[cur->index - 1];
-
- /* descend back down the right side to get the pointer */
- node = cur->node->ptrs[cur->index - 1];
- while (level--)
- node = node->ptrs[node->num_keys];
- return node;
-}
-
-static void *
-btree_get_next(
- struct btree_root *root,
- unsigned long *key)
-{
- struct btree_cursor *cur = root->cursor;
- int level = 0;
- struct btree_node *node;
-
- while (cur->index == cur->node->num_keys) {
- if (++level == root->height)
- return NULL;
- cur++;
- }
- if (level == 0) {
- if (key) {
- cur->index++;
- *key = btree_key_of_cursor(cur, root->height);
- cur->index--;
- }
- return cur->node->ptrs[cur->index + 1];
- }
-
- node = cur->node->ptrs[cur->index + 1];
- while (--level > 0)
- node = node->ptrs[0];
- if (key)
- *key = node->keys[0];
- return node->ptrs[0];
-}
-
-/*
- * Lookup/Search functions
- */
-
-static int
-btree_do_search(
- struct btree_root *root,
- unsigned long key)
-{
- unsigned long k = 0;
- struct btree_cursor *cur = root->cursor + root->height;
- struct btree_node *node = root->root_node;
- int height = root->height;
- int key_found = 0;
- int i;
-
- while (--height >= 0) {
- cur--;
- for (i = 0; i < node->num_keys; i++)
- if (node->keys[i] >= key) {
- k = node->keys[i];
- key_found = 1;
- break;
- }
- cur->node = node;
- cur->index = i;
- node = node->ptrs[i];
- }
- root->keys_valid = key_found;
- if (!key_found)
- return 0;
-
- root->cur_key = k;
- root->next_value = NULL; /* do on-demand next value lookup */
- root->prev_value = btree_get_prev(root, &root->prev_key);
- return 1;
-}
-
-static int
-btree_search(
- struct btree_root *root,
- unsigned long key)
-{
- if (root->keys_valid && key <= root->cur_key &&
- (!root->prev_value || key > root->prev_key)) {
-#ifdef BTREE_STATS
- root->stats.cache_hits++;
-#endif
- return 1;
- }
-#ifdef BTREE_STATS
- root->stats.cache_misses++;
-#endif
- return btree_do_search(root, key);
-}
-
-void *
-btree_find(
- struct btree_root *root,
- unsigned long key,
- unsigned long *actual_key)
-{
-#ifdef BTREE_STATS
- root->stats.find += 1;
-#endif
- if (!btree_search(root, key))
- return NULL;
-
- if (actual_key)
- *actual_key = root->cur_key;
- return root->cursor->node->ptrs[root->cursor->index];
-}
-
-void *
-btree_lookup(
- struct btree_root *root,
- unsigned long key)
-{
-#ifdef BTREE_STATS
- root->stats.lookup += 1;
-#endif
- if (!btree_search(root, key) || root->cur_key != key)
- return NULL;
- return root->cursor->node->ptrs[root->cursor->index];
-}
-
-void *
-btree_peek_prev(
- struct btree_root *root,
- unsigned long *key)
-{
- if (!root->keys_valid)
- return NULL;
- if (key)
- *key = root->prev_key;
- return root->prev_value;
-}
-
-void *
-btree_peek_next(
- struct btree_root *root,
- unsigned long *key)
-{
- if (!root->keys_valid)
- return NULL;
- if (!root->next_value)
- root->next_value = btree_get_next(root, &root->next_key);
- if (key)
- *key = root->next_key;
- return root->next_value;
-}
-
-static void *
-btree_move_cursor_to_next(
- struct btree_root *root,
- unsigned long *key)
-{
- struct btree_cursor *cur = root->cursor;
- int level = 0;
-
- while (cur->index == cur->node->num_keys) {
- if (++level == root->height)
- return NULL;
- cur++;
- }
- cur->index++;
- if (level == 0) {
- if (key)
- *key = btree_key_of_cursor(cur, root->height);
- return cur->node->ptrs[cur->index];
- }
-
- while (--level >= 0) {
- root->cursor[level].node = cur->node->ptrs[cur->index];
- root->cursor[level].index = 0;
- cur--;
- }
- if (key)
- *key = cur->node->keys[0];
- return cur->node->ptrs[0];
-}
-
-void *
-btree_lookup_next(
- struct btree_root *root,
- unsigned long *key)
-{
- void *value;
-
- if (!root->keys_valid)
- return NULL;
-
- root->prev_key = root->cur_key;
- root->prev_value = root->cursor->node->ptrs[root->cursor->index];
-
- value = btree_move_cursor_to_next(root, &root->cur_key);
- if (!value) {
- btree_invalidate_cursor(root);
- return NULL;
- }
- root->next_value = NULL; /* on-demand next value fetch */
- if (key)
- *key = root->cur_key;
- return value;
-}
-
-static void *
-btree_move_cursor_to_prev(
- struct btree_root *root,
- unsigned long *key)
-{
- struct btree_cursor *cur = root->cursor;
- int level = 0;
-
- while (cur->index == 0) {
- if (++level == root->height)
- return NULL;
- cur++;
- }
- cur->index--;
- if (key) /* the key is in the current level */
- *key = cur->node->keys[cur->index];
- while (level > 0) {
- level--;
- root->cursor[level].node = cur->node->ptrs[cur->index];
- root->cursor[level].index = root->cursor[level].node->num_keys;
- cur--;
- }
- return cur->node->ptrs[cur->index];
-}
-
-void *
-btree_lookup_prev(
- struct btree_root *root,
- unsigned long *key)
-{
- void *value;
-
- if (!root->keys_valid)
- return NULL;
-
- value = btree_move_cursor_to_prev(root, &root->cur_key);
- if (!value)
- return NULL;
- root->prev_value = btree_get_prev(root, &root->prev_key);
- root->next_value = NULL; /* on-demand next value fetch */
- if (key)
- *key = root->cur_key;
- return value;
-}
-
-void *
-btree_uncached_lookup(
- struct btree_root *root,
- unsigned long key)
-{
- /* cursor-less (ie. uncached) lookup */
- int height = root->height - 1;
- struct btree_node *node = root->root_node;
- int i;
- int key_found = 0;
-
- while (height >= 0) {
- for (i = 0; i < node->num_keys; i++)
- if (node->keys[i] >= key) {
- key_found = node->keys[i] == key;
- break;
- }
- node = node->ptrs[i];
- height--;
- }
- return key_found ? node : NULL;
-}
-
-/* Update functions */
-
-static inline void
-btree_update_node_key(
- struct btree_root *root,
- struct btree_cursor *cursor,
- int level,
- unsigned long new_key)
-{
- int i;
-
-#ifdef BTREE_STATS
- root->stats.key_update += 1;
-#endif
-
- cursor += level;
- for (i = level; i < root->height; i++) {
- if (cursor->index < cursor->node->num_keys) {
- cursor->node->keys[cursor->index] = new_key;
- break;
- }
- cursor++;
- }
-}
-
-int
-btree_update_key(
- struct btree_root *root,
- unsigned long old_key,
- unsigned long new_key)
-{
- if (!btree_search(root, old_key) || root->cur_key != old_key)
- return ENOENT;
-
- if (root->next_value && new_key >= root->next_key)
- return EINVAL;
-
- if (root->prev_value && new_key <= root->prev_key)
- return EINVAL;
-
- btree_update_node_key(root, root->cursor, 0, new_key);
-
- return 0;
-}
-
-int
-btree_update_value(
- struct btree_root *root,
- unsigned long key,
- void *new_value)
-{
- if (!new_value)
- return EINVAL;
-
- if (!btree_search(root, key) || root->cur_key != key)
- return ENOENT;
-
-#ifdef BTREE_STATS
- root->stats.value_update += 1;
-#endif
- root->cursor->node->ptrs[root->cursor->index] = new_value;
-
- return 0;
-}
-
-/*
- * Cursor modification functions - used for inserting and deleting
- */
-
-static struct btree_cursor *
-btree_copy_cursor_prev(
- struct btree_root *root,
- struct btree_cursor *dest_cursor,
- int level)
-{
- struct btree_cursor *src_cur = root->cursor + level;
- struct btree_cursor *dst_cur;
- int l = level;
- int i;
-
- if (level >= root->height)
- return NULL;
-
- while (src_cur->index == 0) {
- if (++l >= root->height)
- return NULL;
- src_cur++;
- }
- for (i = l; i < root->height; i++)
- dest_cursor[i] = *src_cur++;
-
- dst_cur = dest_cursor + l;
- dst_cur->index--;
- while (l-- >= level) {
- dest_cursor[l].node = dst_cur->node->ptrs[dst_cur->index];
- dest_cursor[l].index = dest_cursor[l].node->num_keys;
- dst_cur--;
- }
- return dest_cursor;
-}
-
-static struct btree_cursor *
-btree_copy_cursor_next(
- struct btree_root *root,
- struct btree_cursor *dest_cursor,
- int level)
-{
- struct btree_cursor *src_cur = root->cursor + level;
- struct btree_cursor *dst_cur;
- int l = level;
- int i;
-
- if (level >= root->height)
- return NULL;
-
- while (src_cur->index == src_cur->node->num_keys) {
- if (++l >= root->height)
- return NULL;
- src_cur++;
- }
- for (i = l; i < root->height; i++)
- dest_cursor[i] = *src_cur++;
-
- dst_cur = dest_cursor + l;
- dst_cur->index++;
- while (l-- >= level) {
- dest_cursor[l].node = dst_cur->node->ptrs[dst_cur->index];
- dest_cursor[l].index = 0;
- dst_cur--;
- }
- return dest_cursor;
-}
-
-/*
- * Shift functions
- *
- * Tries to move items in the current leaf to its sibling if it has space.
- * Used in both insert and delete functions.
- * Returns the number of items shifted.
- */
-
-static int
-btree_shift_to_prev(
- struct btree_root *root,
- int level,
- struct btree_cursor *prev_cursor,
- int num_children)
-{
- struct btree_node *node;
- struct btree_node *prev_node;
- int num_remain; /* # of keys left in "node" */
- unsigned long key;
- int i;
-
- if (!prev_cursor || !num_children)
- return 0;
-
- prev_node = prev_cursor[level].node;
- node = root->cursor[level].node;
-
- ASSERT(num_children > 0 && num_children <= node->num_keys + 1);
-
- if ((prev_node->num_keys + num_children) > BTREE_KEY_MAX)
- return 0;
-
-#ifdef BTREE_STATS
- root->stats.shift_prev += 1;
-#endif
-
- num_remain = node->num_keys - num_children;
- ASSERT(num_remain == -1 || num_remain >= BTREE_KEY_MIN);
-
- /* shift parent keys around */
- level++;
- if (num_remain > 0)
- key = node->keys[num_children - 1];
- else
- key = btree_key_of_cursor(root->cursor + level,
- root->height - level);
- while (prev_cursor[level].index == prev_cursor[level].node->num_keys) {
- level++;
- ASSERT(level < root->height);
- }
- prev_node->keys[prev_node->num_keys] =
- prev_cursor[level].node->keys[prev_cursor[level].index];
- prev_cursor[level].node->keys[prev_cursor[level].index] = key;
-
- /* copy pointers and keys to the end of the prev node */
- for (i = 0; i < num_children - 1; i++) {
- prev_node->keys[prev_node->num_keys + 1 + i] = node->keys[i];
- prev_node->ptrs[prev_node->num_keys + 1 + i] = node->ptrs[i];
- }
- prev_node->ptrs[prev_node->num_keys + 1 + i] = node->ptrs[i];
- prev_node->num_keys += num_children;
-
- /* move remaining pointers/keys to start of node */
- if (num_remain >= 0) {
- for (i = 0; i < num_remain; i++) {
- node->keys[i] = node->keys[num_children + i];
- node->ptrs[i] = node->ptrs[num_children + i];
- }
- node->ptrs[i] = node->ptrs[num_children + i];
- node->num_keys = num_remain;
- } else
- node->num_keys = 0;
-
- return num_children;
-}
-
-static int
-btree_shift_to_next(
- struct btree_root *root,
- int level,
- struct btree_cursor *next_cursor,
- int num_children)
-{
- struct btree_node *node;
- struct btree_node *next_node;
- int num_remain; /* # of children left in node */
- int i;
-
- if (!next_cursor || !num_children)
- return 0;
-
- node = root->cursor[level].node;
- next_node = next_cursor[level].node;
-
- ASSERT(num_children > 0 && num_children <= node->num_keys + 1);
-
- if ((next_node->num_keys + num_children) > BTREE_KEY_MAX)
- return 0;
-
- num_remain = node->num_keys + 1 - num_children;
- ASSERT(num_remain == 0 || num_remain > BTREE_KEY_MIN);
-
-#ifdef BTREE_STATS
- root->stats.shift_next += 1;
-#endif
-
- /* make space for "num_children" items at beginning of next-leaf */
- i = next_node->num_keys;
- next_node->ptrs[num_children + i] = next_node->ptrs[i];
- while (--i >= 0) {
- next_node->keys[num_children + i] = next_node->keys[i];
- next_node->ptrs[num_children + i] = next_node->ptrs[i];
- }
-
- /* update keys in parent and next node from parent */
- do {
- level++;
- ASSERT(level < root->height);
- } while (root->cursor[level].index == root->cursor[level].node->num_keys);
-
- next_node->keys[num_children - 1] =
- root->cursor[level].node->keys[root->cursor[level].index];
- root->cursor[level].node->keys[root->cursor[level].index] =
- node->keys[node->num_keys - num_children];
-
- /* copy last "num_children" items from node into start of next-node */
- for (i = 0; i < num_children - 1; i++) {
- next_node->keys[i] = node->keys[num_remain + i];
- next_node->ptrs[i] = node->ptrs[num_remain + i];
- }
- next_node->ptrs[i] = node->ptrs[num_remain + i];
- next_node->num_keys += num_children;
-
- if (num_remain > 0)
- node->num_keys -= num_children;
- else
- node->num_keys = 0;
-
- return num_children;
-}
-
-/*
- * Insertion functions
- */
-
-static struct btree_node *
-btree_increase_height(
- struct btree_root *root)
-{
- struct btree_node *new_root;
- struct btree_cursor *new_cursor;
-
- new_cursor = realloc(root->cursor, (root->height + 1) *
- sizeof(struct btree_cursor));
- if (!new_cursor)
- return NULL;
- root->cursor = new_cursor;
-
- new_root = btree_node_alloc();
- if (!new_root)
- return NULL;
-
-#ifdef BTREE_STATS
- root->stats.alloced += 1;
- root->stats.inc_height += 1;
- root->stats.max_items *= BTREE_PTR_MAX;
-#endif
-
- new_root->ptrs[0] = root->root_node;
- root->root_node = new_root;
-
- root->cursor[root->height].node = new_root;
- root->cursor[root->height].index = 0;
-
- root->height++;
-
- return new_root;
-}
-
-static int
-btree_insert_item(
- struct btree_root *root,
- int level,
- unsigned long key,
- void *value);
-
-
-static struct btree_node *
-btree_split(
- struct btree_root *root,
- int level,
- unsigned long key,
- int *index)
-{
- struct btree_node *node = root->cursor[level].node;
- struct btree_node *new_node;
- int i;
-
- new_node = btree_node_alloc();
- if (!new_node)
- return NULL;
-
- if (btree_insert_item(root, level + 1, node->keys[BTREE_KEY_MIN],
- new_node) != 0) {
- btree_node_free(new_node);
- return NULL;
- }
-
-#ifdef BTREE_STATS
- root->stats.alloced += 1;
- root->stats.split += 1;
-#endif
-
- for (i = 0; i < BTREE_KEY_MAX - BTREE_KEY_MIN - 1; i++) {
- new_node->keys[i] = node->keys[BTREE_KEY_MIN + 1 + i];
- new_node->ptrs[i] = node->ptrs[BTREE_KEY_MIN + 1 + i];
- }
- new_node->ptrs[i] = node->ptrs[BTREE_KEY_MIN + 1 + i];
- new_node->num_keys = BTREE_KEY_MAX - BTREE_KEY_MIN - 1;
-
- node->num_keys = BTREE_KEY_MIN;
- if (key < node->keys[BTREE_KEY_MIN])
- return node; /* index doesn't change */
-
- /* insertion point is in new node... */
- *index -= BTREE_KEY_MIN + 1;
- return new_node;
-}
-
-static int
-btree_insert_shift_to_prev(
- struct btree_root *root,
- int level,
- int *index)
-{
- struct btree_cursor tmp_cursor[root->height];
- int n;
-
- if (*index <= 0)
- return -1;
-
- if (!btree_copy_cursor_prev(root, tmp_cursor, level + 1))
- return -1;
-
- n = MIN(*index, (BTREE_PTR_MAX - tmp_cursor[level].node->num_keys) / 2);
- if (!n || !btree_shift_to_prev(root, level, tmp_cursor, n))
- return -1;
-
- *index -= n;
- return 0;
-}
-
-static int
-btree_insert_shift_to_next(
- struct btree_root *root,
- int level,
- int *index)
-{
- struct btree_cursor tmp_cursor[root->height];
- int n;
-
- if (*index >= BTREE_KEY_MAX)
- return -1;
-
- if (!btree_copy_cursor_next(root, tmp_cursor, level + 1))
- return -1;
-
- n = MIN(BTREE_KEY_MAX - *index,
- (BTREE_PTR_MAX - tmp_cursor[level].node->num_keys) / 2);
- if (!n || !btree_shift_to_next(root, level, tmp_cursor, n))
- return -1;
- return 0;
-}
-
-static int
-btree_insert_item(
- struct btree_root *root,
- int level,
- unsigned long key,
- void *value)
-{
- struct btree_node *node = root->cursor[level].node;
- int index = root->cursor[level].index;
- int i;
-
- if (node->num_keys == BTREE_KEY_MAX) {
- if (btree_insert_shift_to_prev(root, level, &index) == 0)
- goto insert;
- if (btree_insert_shift_to_next(root, level, &index) == 0)
- goto insert;
- if (level == root->height - 1) {
- if (!btree_increase_height(root))
- return ENOMEM;
- }
- node = btree_split(root, level, key, &index);
- if (!node)
- return ENOMEM;
- }
-insert:
- ASSERT(index <= node->num_keys);
-
- i = node->num_keys;
- node->ptrs[i + 1] = node->ptrs[i];
- while (--i >= index) {
- node->keys[i + 1] = node->keys[i];
- node->ptrs[i + 1] = node->ptrs[i];
- }
-
- node->num_keys++;
- node->keys[index] = key;
-
- if (level == 0)
- node->ptrs[index] = value;
- else
- node->ptrs[index + 1] = value;
-
- return 0;
-}
-
-
-
-int
-btree_insert(
- struct btree_root *root,
- unsigned long key,
- void *value)
-{
- int result;
-
- if (!value)
- return EINVAL;
-
- if (btree_search(root, key) && root->cur_key == key)
- return EEXIST;
-
-#ifdef BTREE_STATS
- root->stats.insert += 1;
- root->stats.num_items += 1;
-#endif
-
- result = btree_insert_item(root, 0, key, value);
-
- btree_invalidate_cursor(root);
-
- return result;
-}
-
-
-/*
- * Deletion functions
- *
- * Rather more complicated as deletions has 4 ways to go once a node
- * ends up with less than the minimum number of keys:
- * - move remainder to previous node
- * - move remainder to next node
- * (both will involve a parent deletion which may recurse)
- * - balance by moving some items from previous node
- * - balance by moving some items from next node
- */
-
-static void
-btree_decrease_height(
- struct btree_root *root)
-{
- struct btree_node *old_root = root->root_node;
-
- ASSERT(old_root->num_keys == 0);
-
-#ifdef BTREE_STATS
- root->stats.alloced -= 1;
- root->stats.dec_height += 1;
- root->stats.max_items /= BTREE_PTR_MAX;
-#endif
- root->root_node = old_root->ptrs[0];
- btree_node_free(old_root);
- root->height--;
-}
-
-static int
-btree_merge_with_prev(
- struct btree_root *root,
- int level,
- struct btree_cursor *prev_cursor)
-{
- if (!prev_cursor)
- return 0;
-
- if (!btree_shift_to_prev(root, level, prev_cursor,
- root->cursor[level].node->num_keys + 1))
- return 0;
-
-#ifdef BTREE_STATS
- root->stats.merge_prev += 1;
-#endif
- return 1;
-}
-
-static int
-btree_merge_with_next(
- struct btree_root *root,
- int level,
- struct btree_cursor *next_cursor)
-{
- if (!next_cursor)
- return 0;
-
- if (!btree_shift_to_next(root, level, next_cursor,
- root->cursor[level].node->num_keys + 1))
- return 0;
-
-#ifdef BTREE_STATS
- root->stats.merge_next += 1;
-#endif
- return 1;
-}
-
-static int
-btree_balance_with_prev(
- struct btree_root *root,
- int level,
- struct btree_cursor *prev_cursor)
-{
- struct btree_cursor *root_cursor = root->cursor;
-
- if (!prev_cursor)
- return 0;
- ASSERT(prev_cursor[level].node->num_keys > BTREE_KEY_MIN);
-
-#ifdef BTREE_STATS
- root->stats.balance_prev += 1;
-#endif
- /*
- * Move some nodes from the prev node into the current node.
- * As the shift operation is a right shift and is relative to
- * the root cursor, make the root cursor the prev cursor and
- * pass in the root cursor as the next cursor.
- */
-
- root->cursor = prev_cursor;
- if (!btree_shift_to_next(root, level, root_cursor,
- (prev_cursor[level].node->num_keys + 1 - BTREE_KEY_MIN) / 2))
- abort();
- root->cursor = root_cursor;
-
- return 1;
-}
-
-static int
-btree_balance_with_next(
- struct btree_root *root,
- int level,
- struct btree_cursor *next_cursor)
-{
- struct btree_cursor *root_cursor = root->cursor;
-
- if (!next_cursor)
- return 0;
- assert(next_cursor[level].node->num_keys > BTREE_KEY_MIN);
-
-#ifdef btree_stats
- root->stats.balance_next += 1;
-#endif
- /*
- * move some nodes from the next node into the current node.
- * as the shift operation is a left shift and is relative to
- * the root cursor, make the root cursor the next cursor and
- * pass in the root cursor as the prev cursor.
- */
-
- root->cursor = next_cursor;
- if (!btree_shift_to_prev(root, level, root_cursor,
- (next_cursor[level].node->num_keys + 1 - BTREE_KEY_MIN) / 2))
- abort();
- root->cursor = root_cursor;
-
- return 1;
-
-}
-
-static void
-btree_delete_key(
- struct btree_root *root,
- int level);
-
-/*
- * btree_delete_node:
- *
- * Return 0 if it's done or 1 if the next level needs to be collapsed
- */
-static void
-btree_delete_node(
- struct btree_root *root,
- int level)
-{
- struct btree_cursor prev_cursor[root->height];
- struct btree_cursor next_cursor[root->height];
- struct btree_cursor *pc;
- struct btree_cursor *nc;
-
- /*
- * the node has underflowed, grab or merge keys/items from a
- * neighbouring node.
- */
-
- if (level == root->height - 1) {
- if (level > 0 && root->root_node->num_keys == 0)
- btree_decrease_height(root);
- return;
- }
-
- pc = btree_copy_cursor_prev(root, prev_cursor, level + 1);
- if (!btree_merge_with_prev(root, level, pc)) {
- nc = btree_copy_cursor_next(root, next_cursor, level + 1);
- if (!btree_merge_with_next(root, level, nc)) {
- /* merging failed, try redistrubution */
- if (!btree_balance_with_prev(root, level, pc) &&
- !btree_balance_with_next(root, level, nc))
- abort();
- return; /* when balancing, then the node isn't freed */
- }
- }
-
-#ifdef BTREE_STATS
- root->stats.alloced -= 1;
-#endif
- btree_node_free(root->cursor[level].node);
-
- btree_delete_key(root, level + 1);
-}
-
-static void
-btree_delete_key(
- struct btree_root *root,
- int level)
-{
- struct btree_node *node = root->cursor[level].node;
- int index = root->cursor[level].index;
-
- node->num_keys--;
- if (index <= node->num_keys) {
- /*
- * if not deleting the last item, shift higher items down
- * to cover the item being deleted
- */
- while (index < node->num_keys) {
- node->keys[index] = node->keys[index + 1];
- node->ptrs[index] = node->ptrs[index + 1];
- index++;
- }
- node->ptrs[index] = node->ptrs[index + 1];
- } else {
- /*
- * else update the associated parent key as the last key
- * in the leaf has changed
- */
- btree_update_node_key(root, root->cursor, level + 1,
- node->keys[node->num_keys]);
- }
- /*
- * if node underflows, either merge with sibling or rebalance
- * with sibling.
- */
- if (node->num_keys < BTREE_KEY_MIN)
- btree_delete_node(root, level);
-}
-
-void *
-btree_delete(
- struct btree_root *root,
- unsigned long key)
-{
- void *value;
-
- value = btree_lookup(root, key);
- if (!value)
- return NULL;
-
-#ifdef BTREE_STATS
- root->stats.delete += 1;
- root->stats.num_items -= 1;
-#endif
-
- btree_delete_key(root, 0);
-
- btree_invalidate_cursor(root);
-
- return value;
-}
-
-#ifdef BTREE_STATS
-void
-btree_print_stats(
- struct btree_root *root,
- FILE *f)
-{
- unsigned long max_items = root->stats.max_items *
- (root->root_node->num_keys + 1);
-
- fprintf(f, "\tnum_items = %lu, max_items = %lu (%lu%%)\n",
- root->stats.num_items, max_items,
- root->stats.num_items * 100 / max_items);
- fprintf(f, "\talloced = %d nodes, %lu bytes, %lu bytes per item\n",
- root->stats.alloced,
- root->stats.alloced * sizeof(struct btree_node),
- root->stats.alloced * sizeof(struct btree_node) /
- root->stats.num_items);
- fprintf(f, "\tlookup = %d\n", root->stats.lookup);
- fprintf(f, "\tfind = %d\n", root->stats.find);
- fprintf(f, "\tcache_hits = %d\n", root->stats.cache_hits);
- fprintf(f, "\tcache_misses = %d\n", root->stats.cache_misses);
- fprintf(f, "\tkey_update = %d\n", root->stats.key_update);
- fprintf(f, "\tvalue_update = %d\n", root->stats.value_update);
- fprintf(f, "\tinsert = %d\n", root->stats.insert);
- fprintf(f, "\tshift_prev = %d\n", root->stats.shift_prev);
- fprintf(f, "\tshift_next = %d\n", root->stats.shift_next);
- fprintf(f, "\tsplit = %d\n", root->stats.split);
- fprintf(f, "\tinc_height = %d\n", root->stats.inc_height);
- fprintf(f, "\tdelete = %d\n", root->stats.delete);
- fprintf(f, "\tmerge_prev = %d\n", root->stats.merge_prev);
- fprintf(f, "\tmerge_next = %d\n", root->stats.merge_next);
- fprintf(f, "\tbalance_prev = %d\n", root->stats.balance_prev);
- fprintf(f, "\tbalance_next = %d\n", root->stats.balance_next);
- fprintf(f, "\tdec_height = %d\n", root->stats.dec_height);
-}
-#endif
+++ /dev/null
-/*
- * Copyright (c) 2007 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef _BTREE_H
-#define _BTREE_H
-
-
-struct btree_root;
-
-void
-btree_init(
- struct btree_root **root);
-
-void
-btree_destroy(
- struct btree_root *root);
-
-int
-btree_is_empty(
- struct btree_root *root);
-
-void *
-btree_lookup(
- struct btree_root *root,
- unsigned long key);
-
-void *
-btree_find(
- struct btree_root *root,
- unsigned long key,
- unsigned long *actual_key);
-
-void *
-btree_peek_prev(
- struct btree_root *root,
- unsigned long *key);
-
-void *
-btree_peek_next(
- struct btree_root *root,
- unsigned long *key);
-
-void *
-btree_lookup_next(
- struct btree_root *root,
- unsigned long *key);
-
-void *
-btree_lookup_prev(
- struct btree_root *root,
- unsigned long *key);
-
-int
-btree_insert(
- struct btree_root *root,
- unsigned long key,
- void *value);
-
-void *
-btree_delete(
- struct btree_root *root,
- unsigned long key);
-
-int
-btree_update_key(
- struct btree_root *root,
- unsigned long old_key,
- unsigned long new_key);
-
-int
-btree_update_value(
- struct btree_root *root,
- unsigned long key,
- void *new_value);
-
-void
-btree_clear(
- struct btree_root *root);
-
-#ifdef BTREE_STATS
-void
-btree_print_stats(
- struct btree_root *root,
- FILE *f);
-#endif
-
-#endif /* _BTREE_H */
int i;
int j;
int state;
- xfs_extlen_t blen;
agno = XFS_INO_TO_AGNO(mp, ino);
agino = XFS_INO_TO_AGINO(mp, ino);
pthread_mutex_lock(&ag_locks[agno]);
- state = get_bmap(agno, agbno);
- switch (state) {
+ switch (state = get_agbno_state(mp, agno, agbno)) {
case XR_E_INO:
do_warn(
_("uncertain inode block %d/%d already known\n"),
case XR_E_UNKNOWN:
case XR_E_FREE1:
case XR_E_FREE:
- set_bmap(agno, agbno, XR_E_INO);
+ set_agbno_state(mp, agno, agbno, XR_E_INO);
break;
case XR_E_MULT:
case XR_E_INUSE:
do_warn(
_("inode block %d/%d multiply claimed, (state %d)\n"),
agno, agbno, state);
- set_bmap(agno, agbno, XR_E_MULT);
+ set_agbno_state(mp, agno, agbno, XR_E_MULT);
pthread_mutex_unlock(&ag_locks[agno]);
return(0);
default:
do_warn(
_("inode block %d/%d bad state, (state %d)\n"),
agno, agbno, state);
- set_bmap(agno, agbno, XR_E_INO);
+ set_agbno_state(mp, agno, agbno, XR_E_INO);
break;
}
* entry or an iunlinked pointer
*/
pthread_mutex_lock(&ag_locks[agno]);
- for (cur_agbno = chunk_start_agbno;
- cur_agbno < chunk_stop_agbno;
- cur_agbno += blen) {
- state = get_bmap_ext(agno, cur_agbno, chunk_stop_agbno, &blen);
- switch (state) {
+ for (j = 0, cur_agbno = chunk_start_agbno;
+ cur_agbno < chunk_stop_agbno; cur_agbno++) {
+ switch (state = get_agbno_state(mp, agno, cur_agbno)) {
case XR_E_MULT:
case XR_E_INUSE:
case XR_E_INUSE_FS:
do_warn(
_("inode block %d/%d multiply claimed, (state %d)\n"),
agno, cur_agbno, state);
- set_bmap_ext(agno, cur_agbno, blen, XR_E_MULT);
- pthread_mutex_unlock(&ag_locks[agno]);
- return 0;
+ set_agbno_state(mp, agno, cur_agbno, XR_E_MULT);
+ j = 1;
+ break;
case XR_E_INO:
do_error(
_("uncertain inode block overlap, agbno = %d, ino = %llu\n"),
default:
break;
}
+
+ if (j) {
+ pthread_mutex_unlock(&ag_locks[agno]);
+ return(0);
+ }
}
pthread_mutex_unlock(&ag_locks[agno]);
pthread_mutex_lock(&ag_locks[agno]);
for (cur_agbno = chunk_start_agbno;
- cur_agbno < chunk_stop_agbno;
- cur_agbno += blen) {
- state = get_bmap_ext(agno, cur_agbno, chunk_stop_agbno, &blen);
- switch (state) {
+ cur_agbno < chunk_stop_agbno; cur_agbno++) {
+ switch (state = get_agbno_state(mp, agno, cur_agbno)) {
case XR_E_INO:
do_error(
_("uncertain inode block %llu already known\n"),
case XR_E_UNKNOWN:
case XR_E_FREE1:
case XR_E_FREE:
- set_bmap_ext(agno, cur_agbno, blen, XR_E_INO);
+ set_agbno_state(mp, agno, cur_agbno, XR_E_INO);
break;
case XR_E_MULT:
case XR_E_INUSE:
do_warn(
_("inode block %d/%d bad state, (state %d)\n"),
agno, cur_agbno, state);
- set_bmap_ext(agno, cur_agbno, blen, XR_E_INO);
+ set_agbno_state(mp, agno, cur_agbno, XR_E_INO);
break;
}
}
cluster_count * sizeof(xfs_buf_t*));
for (bp_index = 0; bp_index < cluster_count; bp_index++) {
+#ifdef XR_PF_TRACE
pftrace("about to read off %llu in AG %d",
(long long)XFS_AGB_TO_DADDR(mp, agno, agbno), agno);
-
+#endif
bplist[bp_index] = libxfs_readbuf(mp->m_dev,
XFS_AGB_TO_DADDR(mp, agno, agbno),
XFS_FSB_TO_BB(mp, blks_per_cluster), 0);
}
agbno += blks_per_cluster;
+#ifdef XR_PF_TRACE
pftrace("readbuf %p (%llu, %d) in AG %d", bplist[bp_index],
(long long)XFS_BUF_ADDR(bplist[bp_index]),
XFS_BUF_COUNT(bplist[bp_index]), agno);
+#endif
}
agbno = XFS_AGINO_TO_AGBNO(mp, first_irec->ino_startnum);
* mark block as an inode block in the incore bitmap
*/
pthread_mutex_lock(&ag_locks[agno]);
- state = get_bmap(agno, agbno);
- switch (state) {
- case XR_E_INO: /* already marked */
- break;
- case XR_E_UNKNOWN:
- case XR_E_FREE:
- case XR_E_FREE1:
- set_bmap(agno, agbno, XR_E_INO);
- break;
- case XR_E_BAD_STATE:
- do_error(_("bad state in block map %d\n"), state);
- break;
- default:
- set_bmap(agno, agbno, XR_E_MULT);
- do_warn(_("inode block %llu multiply claimed, state was %d\n"),
- XFS_AGB_TO_FSB(mp, agno, agbno), state);
- break;
+ switch (state = get_agbno_state(mp, agno, agbno)) {
+ case XR_E_INO: /* already marked */
+ break;
+ case XR_E_UNKNOWN:
+ case XR_E_FREE:
+ case XR_E_FREE1:
+ set_agbno_state(mp, agno, agbno, XR_E_INO);
+ break;
+ case XR_E_BAD_STATE:
+ do_error(_("bad state in block map %d\n"), state);
+ break;
+ default:
+ set_agbno_state(mp, agno, agbno, XR_E_MULT);
+ do_warn(_("inode block %llu multiply claimed, state was %d\n"),
+ XFS_AGB_TO_FSB(mp, agno, agbno), state);
+ break;
}
pthread_mutex_unlock(&ag_locks[agno]);
* done! - finished up irec and block simultaneously
*/
for (bp_index = 0; bp_index < cluster_count; bp_index++) {
- pftrace("put/writebuf %p (%llu) in AG %d",
- bplist[bp_index], (long long)
- XFS_BUF_ADDR(bplist[bp_index]), agno);
-
+#ifdef XR_PF_TRACE
+ pftrace("put/writebuf %p (%llu) in AG %d", bplist[bp_index],
+ (long long)XFS_BUF_ADDR(bplist[bp_index]), agno);
+#endif
if (dirty && !no_modify)
libxfs_writebuf(bplist[bp_index], 0);
else
agbno++;
pthread_mutex_lock(&ag_locks[agno]);
- state = get_bmap(agno, agbno);
- switch (state) {
+ switch (state = get_agbno_state(mp, agno, agbno)) {
case XR_E_INO: /* already marked */
break;
case XR_E_UNKNOWN:
case XR_E_FREE:
case XR_E_FREE1:
- set_bmap(agno, agbno, XR_E_INO);
+ set_agbno_state(mp, agno, agbno, XR_E_INO);
break;
case XR_E_BAD_STATE:
do_error(_("bad state in block map %d\n"),
state);
break;
default:
- set_bmap(agno, agbno, XR_E_MULT);
+ set_agbno_state(mp, agno, agbno, XR_E_MULT);
do_warn(_("inode block %llu multiply claimed, "
"state was %d\n"),
XFS_AGB_TO_FSB(mp, agno, agbno), state);
/*
* set the appropriate number of extents
- * this iterates block by block, this can be optimised using extents
*/
for (b = irec->br_startblock; b < irec->br_startblock +
irec->br_blockcount; b += mp->m_sb.sb_rextsize) {
continue;
}
- state = get_rtbmap(ext);
+ state = get_rtbno_state(mp, ext);
+
switch (state) {
- case XR_E_FREE:
- case XR_E_UNKNOWN:
- set_rtbmap(ext, XR_E_INUSE);
- break;
- case XR_E_BAD_STATE:
- do_error(_("bad state in rt block map %llu\n"), ext);
- case XR_E_FS_MAP:
- case XR_E_INO:
- case XR_E_INUSE_FS:
- do_error(_("data fork in rt inode %llu found "
- "metadata block %llu in rt bmap\n"),
- ino, ext);
- case XR_E_INUSE:
- if (pwe)
+ case XR_E_FREE:
+ case XR_E_UNKNOWN:
+ set_rtbno_state(mp, ext, XR_E_INUSE);
break;
- case XR_E_MULT:
- set_rtbmap(ext, XR_E_MULT);
- do_warn(_("data fork in rt inode %llu claims "
- "used rt block %llu\n"),
+
+ case XR_E_BAD_STATE:
+ do_error(_("bad state in rt block map %llu\n"),
+ ext);
+
+ case XR_E_FS_MAP:
+ case XR_E_INO:
+ case XR_E_INUSE_FS:
+ do_error(_("data fork in rt inode %llu found "
+ "metadata block %llu in rt bmap\n"),
ino, ext);
- return 1;
- case XR_E_FREE1:
- default:
- do_error(_("illegal state %d in rt block map "
- "%llu\n"), state, b);
+
+ case XR_E_INUSE:
+ if (pwe)
+ break;
+
+ case XR_E_MULT:
+ set_rtbno_state(mp, ext, XR_E_MULT);
+ do_warn(_("data fork in rt inode %llu claims "
+ "used rt block %llu\n"),
+ ino, ext);
+ return 1;
+
+ case XR_E_FREE1:
+ default:
+ do_error(_("illegal state %d in rt block map "
+ "%llu\n"), state, b);
}
}
char *forkname;
int i;
int state;
+ xfs_dfsbno_t e;
xfs_agnumber_t agno;
xfs_agblock_t agbno;
- xfs_agblock_t ebno;
- xfs_extlen_t blen;
xfs_agnumber_t locked_agno = -1;
int error = 1;
*/
agno = XFS_FSB_TO_AGNO(mp, irec.br_startblock);
agbno = XFS_FSB_TO_AGBNO(mp, irec.br_startblock);
- ebno = agbno + irec.br_blockcount;
+ e = irec.br_startblock + irec.br_blockcount;
if (agno != locked_agno) {
if (locked_agno != -1)
pthread_mutex_unlock(&ag_locks[locked_agno]);
* checking each entry without setting the
* block bitmap
*/
- if (search_dup_extent(agno, agbno, ebno)) {
- do_warn(_("%s fork in ino %llu claims "
- "dup extent, off - %llu, "
- "start - %llu, cnt %llu\n"),
- forkname, ino, irec.br_startoff,
- irec.br_startblock,
- irec.br_blockcount);
- goto done;
+ for (b = irec.br_startblock; b < e; b++, agbno++) {
+ if (search_dup_extent(mp, agno, agbno)) {
+ do_warn(_("%s fork in ino %llu claims "
+ "dup extent, off - %llu, "
+ "start - %llu, cnt %llu\n"),
+ forkname, ino, irec.br_startoff,
+ irec.br_startblock,
+ irec.br_blockcount);
+ goto done;
+ }
}
*tot += irec.br_blockcount;
continue;
}
- for (b = irec.br_startblock;
- agbno < ebno;
- b += blen, agbno += blen) {
- state = get_bmap_ext(agno, agbno, ebno, &blen);
+ for (b = irec.br_startblock; b < e; b++, agbno++) {
+ /*
+ * Process in chunks of 16 (XR_BB_UNIT/XR_BB)
+ * for common XR_E_UNKNOWN to XR_E_INUSE transition
+ */
+ if (((agbno & XR_BB_MASK) == 0) && ((irec.br_startblock + irec.br_blockcount - b) >= (XR_BB_UNIT/XR_BB))) {
+ if (ba_bmap[agno][agbno>>XR_BB] == XR_E_UNKNOWN_LL) {
+ ba_bmap[agno][agbno>>XR_BB] = XR_E_INUSE_LL;
+ agbno += (XR_BB_UNIT/XR_BB) - 1;
+ b += (XR_BB_UNIT/XR_BB) - 1;
+ continue;
+ }
+
+ }
+
+ state = get_agbno_state(mp, agno, agbno);
+
switch (state) {
case XR_E_FREE:
case XR_E_FREE1:
forkname, ino, (__uint64_t) b);
/* fall through ... */
case XR_E_UNKNOWN:
- set_bmap_ext(agno, agbno, blen, XR_E_INUSE);
+ set_agbno_state(mp, agno, agbno, XR_E_INUSE);
break;
case XR_E_BAD_STATE:
case XR_E_INUSE:
case XR_E_MULT:
- set_bmap_ext(agno, agbno, blen, XR_E_MULT);
+ set_agbno_state(mp, agno, agbno, XR_E_MULT);
do_warn(_("%s fork in %s inode %llu claims "
"used block %llu\n"),
forkname, ftype, ino, (__uint64_t) b);
*nextents = 1;
if (dinoc->di_format != XFS_DINODE_FMT_LOCAL && type != XR_INO_RTDATA)
- *dblkmap = blkmap_alloc(*nextents, XFS_DATA_FORK);
+ *dblkmap = blkmap_alloc(*nextents);
*nextents = 0;
switch (dinoc->di_format) {
err = process_lclinode(mp, agno, ino, dino, XFS_ATTR_FORK);
break;
case XFS_DINODE_FMT_EXTENTS:
- ablkmap = blkmap_alloc(*anextents, XFS_ATTR_FORK);
+ ablkmap = blkmap_alloc(*anextents);
*anextents = 0;
err = process_exinode(mp, agno, ino, dino, type, dirty,
atotblocks, anextents, &ablkmap,
XFS_ATTR_FORK, check_dups);
break;
case XFS_DINODE_FMT_BTREE:
- ablkmap = blkmap_alloc(*anextents, XFS_ATTR_FORK);
+ ablkmap = blkmap_alloc(*anextents);
*anextents = 0;
err = process_btinode(mp, agno, ino, dino, type, dirty,
atotblocks, anextents, &ablkmap,
bplist = bparray;
}
for (i = 0; i < nex; i++) {
+#ifdef XR_PF_TRACE
pftrace("about to read off %llu (len = %d)",
(long long)XFS_FSB_TO_DADDR(mp, bmp[i].startblock),
XFS_FSB_TO_BB(mp, bmp[i].blockcount));
-
+#endif
bplist[i] = libxfs_readbuf(mp->m_dev,
XFS_FSB_TO_DADDR(mp, bmp[i].startblock),
XFS_FSB_TO_BB(mp, bmp[i].blockcount), 0);
if (!bplist[i])
goto failed;
-
+#ifdef XR_PF_TRACE
pftrace("readbuf %p (%llu, %d)", bplist[i],
(long long)XFS_BUF_ADDR(bplist[i]),
XFS_BUF_COUNT(bplist[i]));
+#endif
}
dabuf = malloc(XFS_DA_BUF_SIZE(nex));
if (dabuf == NULL) {
}
da_buf_done(dabuf);
for (i = 0; i < nbuf; i++) {
+#ifdef XR_PF_TRACE
pftrace("putbuf %p (%llu)", bplist[i],
(long long)XFS_BUF_ADDR(bplist[i]));
+#endif
libxfs_putbuf(bplist[i]);
}
if (bplist != &bp)
/*
* bail out if this is the root block (top of tree)
*/
- if (this_level >= cursor->active)
+ if (this_level >= cursor->active)
return(0);
/*
* set hashvalue to correctl reflect the now-validated
* numbers. Do NOT touch the name until after we've computed
* the hashvalue and done a namecheck() on the name.
*
- * Conditions must either set clearino to zero or set
+ * Conditions must either set clearino to zero or set
* clearreason why it's being cleared.
*/
if (!ino_discovery && ent_ino == BADFSINO) {
if (ino_discovery) {
add_inode_uncertain(mp, ent_ino, 0);
clearino = 0;
- } else
+ } else
clearreason = _("non-existent");
} else {
/*
EXTERN int max_symlink_blocks;
EXTERN __int64_t fs_max_file_offset;
+/* block allocation bitmaps */
+
+EXTERN __uint64_t **ba_bmap; /* see incore.h */
+EXTERN __uint64_t *rt_ba_bmap; /* see incore.h */
+
/* realtime info */
EXTERN xfs_rtword_t *btmcompute;
EXTERN int report_interval;
EXTERN __uint64_t *prog_rpt_done;
+#ifdef XR_PF_TRACE
+EXTERN FILE *pf_trace_file;
+#endif
+
EXTERN int ag_stride;
EXTERN int thread_count;
#include <libxfs.h>
#include "avl.h"
-#include "btree.h"
#include "globals.h"
#include "incore.h"
#include "agheader.h"
#include "err_protos.h"
#include "threads.h"
-/*
- * The following manages the in-core bitmap of the entire filesystem
- * using extents in a btree.
- *
- * The btree items will point to one of the state values below,
- * rather than storing the value itself in the pointer.
- */
-static int states[16] =
- {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
+/* ba bmap setupstuff. setting/getting state is in incore.h */
-static struct btree_root **ag_bmap;
-
-static void
-update_bmap(
- struct btree_root *bmap,
- unsigned long offset,
- xfs_extlen_t blen,
- void *new_state)
+void
+setup_bmap(xfs_agnumber_t agno, xfs_agblock_t numblocks, xfs_drtbno_t rtblocks)
{
- unsigned long end = offset + blen;
- int *cur_state;
- unsigned long cur_key;
- int *next_state;
- unsigned long next_key;
- int *prev_state;
-
- cur_state = btree_find(bmap, offset, &cur_key);
- if (!cur_state)
- return;
+ int i;
+ size_t size = 0;
- if (offset == cur_key) {
- /* if the start is the same as the "item" extent */
- if (cur_state == new_state)
- return;
-
- /*
- * Note: this may be NULL if we are updating the map for
- * the superblock.
- */
- prev_state = btree_peek_prev(bmap, NULL);
-
- next_state = btree_peek_next(bmap, &next_key);
- if (next_key > end) {
- /* different end */
- if (new_state == prev_state) {
- /* #1: prev has same state, move offset up */
- btree_update_key(bmap, offset, end);
- return;
- }
-
- /* #4: insert new extent after, update current value */
- btree_update_value(bmap, offset, new_state);
- btree_insert(bmap, end, cur_state);
- return;
- }
+ ba_bmap = (__uint64_t**)malloc(agno*sizeof(__uint64_t *));
+ if (!ba_bmap)
+ do_error(_("couldn't allocate block map pointers\n"));
+ ag_locks = malloc(agno * sizeof(pthread_mutex_t));
+ if (!ag_locks)
+ do_error(_("couldn't allocate block map locks\n"));
- /* same end (and same start) */
- if (new_state == next_state) {
- /* next has same state */
- if (new_state == prev_state) {
- /* #3: merge prev & next */
- btree_delete(bmap, offset);
- btree_delete(bmap, end);
- return;
- }
-
- /* #8: merge next */
- btree_update_value(bmap, offset, new_state);
- btree_delete(bmap, end);
- return;
- }
+ for (i = 0; i < agno; i++) {
+ size = roundup((numblocks+(NBBY/XR_BB)-1) / (NBBY/XR_BB),
+ sizeof(__uint64_t));
- /* same start, same end, next has different state */
- if (new_state == prev_state) {
- /* #5: prev has same state */
- btree_delete(bmap, offset);
+ ba_bmap[i] = (__uint64_t*)memalign(sizeof(__uint64_t), size);
+ if (!ba_bmap[i]) {
+ do_error(_("couldn't allocate block map, size = %d\n"),
+ numblocks);
return;
}
+ memset(ba_bmap[i], 0, size);
+ pthread_mutex_init(&ag_locks[i], NULL);
+ }
- /* #6: update value only */
- btree_update_value(bmap, offset, new_state);
+ if (rtblocks == 0) {
+ rt_ba_bmap = NULL;
return;
}
- /* different start, offset is in the middle of "cur" */
- prev_state = btree_peek_prev(bmap, NULL);
- ASSERT(prev_state != NULL);
- if (prev_state == new_state)
- return;
+ size = roundup(rtblocks / (NBBY/XR_BB), sizeof(__uint64_t));
- if (end == cur_key) {
- /* end is at the same point as the current extent */
- if (new_state == cur_state) {
- /* #7: move next extent down */
- btree_update_key(bmap, end, offset);
+ rt_ba_bmap=(__uint64_t*)memalign(sizeof(__uint64_t), size);
+ if (!rt_ba_bmap) {
+ do_error(
+ _("couldn't allocate realtime block map, size = %llu\n"),
+ rtblocks);
return;
- }
-
- /* #9: different start, same end, add new extent */
- btree_insert(bmap, offset, new_state);
- return;
}
- /* #2: insert an extent into the middle of another extent */
- btree_insert(bmap, offset, new_state);
- btree_insert(bmap, end, prev_state);
+ /*
+ * start all real-time as free blocks
+ */
+ set_bmap_rt(rtblocks);
+
+ return;
}
+/* ARGSUSED */
void
-set_bmap_ext(
- xfs_agnumber_t agno,
- xfs_agblock_t agbno,
- xfs_extlen_t blen,
- int state)
+teardown_rt_bmap(xfs_mount_t *mp)
{
- update_bmap(ag_bmap[agno], agbno, blen, &states[state]);
+ if (rt_ba_bmap != NULL) {
+ free(rt_ba_bmap);
+ rt_ba_bmap = NULL;
+ }
+
+ return;
}
-int
-get_bmap_ext(
- xfs_agnumber_t agno,
- xfs_agblock_t agbno,
- xfs_agblock_t maxbno,
- xfs_extlen_t *blen)
+/* ARGSUSED */
+void
+teardown_ag_bmap(xfs_mount_t *mp, xfs_agnumber_t agno)
{
- int *statep;
- unsigned long key;
-
- statep = btree_find(ag_bmap[agno], agbno, &key);
- if (!statep)
- return -1;
-
- if (key == agbno) {
- if (blen) {
- if (!btree_peek_next(ag_bmap[agno], &key))
- return -1;
- *blen = MIN(maxbno, key) - agbno;
- }
- return *statep;
- }
+ ASSERT(ba_bmap[agno] != NULL);
- statep = btree_peek_prev(ag_bmap[agno], NULL);
- if (!statep)
- return -1;
- if (blen)
- *blen = MIN(maxbno, key) - agbno;
+ free(ba_bmap[agno]);
+ ba_bmap[agno] = NULL;
- return *statep;
+ return;
}
-static uint64_t *rt_bmap;
-static size_t rt_bmap_size;
-
-/* block records fit into __uint64_t's units */
-#define XR_BB_UNIT 64 /* number of bits/unit */
-#define XR_BB 4 /* bits per block record */
-#define XR_BB_NUM (XR_BB_UNIT/XR_BB) /* number of records per unit */
-#define XR_BB_MASK 0xF /* block record mask */
-
-/*
- * these work in real-time extents (e.g. fsbno == rt extent number)
- */
-int
-get_rtbmap(
- xfs_drtbno_t bno)
+/* ARGSUSED */
+void
+teardown_bmap_finish(xfs_mount_t *mp)
{
- return (*(rt_bmap + bno / XR_BB_NUM) >>
- ((bno % XR_BB_NUM) * XR_BB)) & XR_BB_MASK;
+ free(ba_bmap);
+ ba_bmap = NULL;
+
+ return;
}
void
-set_rtbmap(
- xfs_drtbno_t bno,
- int state)
+teardown_bmap(xfs_mount_t *mp)
{
- *(rt_bmap + bno / XR_BB_NUM) =
- ((*(rt_bmap + bno / XR_BB_NUM) &
- (~((__uint64_t) XR_BB_MASK << ((bno % XR_BB_NUM) * XR_BB)))) |
- (((__uint64_t) state) << ((bno % XR_BB_NUM) * XR_BB)));
+ xfs_agnumber_t i;
+
+ for (i = 0; i < mp->m_sb.sb_agcount; i++) {
+ teardown_ag_bmap(mp, i);
+ }
+
+ teardown_rt_bmap(mp);
+ teardown_bmap_finish(mp);
+
+ return;
}
-static void
-reset_rt_bmap(void)
+/*
+ * block map initialization routines -- realtime, log, fs
+ */
+void
+set_bmap_rt(xfs_drtbno_t num)
{
- if (rt_bmap)
- memset(rt_bmap, 0x22, rt_bmap_size); /* XR_E_FREE */
+ xfs_drtbno_t j;
+ xfs_drtbno_t size;
+
+ /*
+ * for now, initialize all realtime blocks to be free
+ * (state == XR_E_FREE)
+ */
+ size = howmany(num / (NBBY/XR_BB), sizeof(__uint64_t));
+
+ for (j = 0; j < size; j++)
+ rt_ba_bmap[j] = 0x2222222222222222LL;
+
+ return;
}
-static void
-init_rt_bmap(
- xfs_mount_t *mp)
+void
+set_bmap_log(xfs_mount_t *mp)
{
- if (mp->m_sb.sb_rextents == 0)
+ xfs_dfsbno_t logend, i;
+
+ if (mp->m_sb.sb_logstart == 0)
return;
- rt_bmap_size = roundup(mp->m_sb.sb_rextents / (NBBY / XR_BB),
- sizeof(__uint64_t));
+ logend = mp->m_sb.sb_logstart + mp->m_sb.sb_logblocks;
- rt_bmap = memalign(sizeof(__uint64_t), rt_bmap_size);
- if (!rt_bmap) {
- do_error(
- _("couldn't allocate realtime block map, size = %llu\n"),
- mp->m_sb.sb_rextents);
- return;
+ for (i = mp->m_sb.sb_logstart; i < logend ; i++) {
+ set_fsbno_state(mp, i, XR_E_INUSE_FS);
}
+
+ return;
}
-static void
-free_rt_bmap(xfs_mount_t *mp)
+void
+set_bmap_fs(xfs_mount_t *mp)
{
- free(rt_bmap);
- rt_bmap = NULL;
-}
+ xfs_agnumber_t i;
+ xfs_agblock_t j;
+ xfs_agblock_t end;
+
+ /*
+ * AG header is 4 sectors
+ */
+ end = howmany(4 * mp->m_sb.sb_sectsize, mp->m_sb.sb_blocksize);
+
+ for (i = 0; i < mp->m_sb.sb_agcount; i++)
+ for (j = 0; j < end; j++)
+ set_agbno_state(mp, i, j, XR_E_INUSE_FS);
+ return;
+}
+#if 0
void
-reset_bmaps(xfs_mount_t *mp)
+set_bmap_fs_bt(xfs_mount_t *mp)
{
- xfs_agnumber_t agno;
- xfs_agblock_t ag_size;
- int ag_hdr_block;
-
- ag_hdr_block = howmany(4 * mp->m_sb.sb_sectsize, mp->m_sb.sb_blocksize);
- ag_size = mp->m_sb.sb_agblocks;
-
- for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
- if (agno == mp->m_sb.sb_agcount - 1)
- ag_size = (xfs_extlen_t)(mp->m_sb.sb_dblocks -
- (xfs_drfsbno_t)mp->m_sb.sb_agblocks * agno);
-#ifdef BTREE_STATS
- if (btree_find(ag_bmap[agno], 0, NULL)) {
- printf("ag_bmap[%d] btree stats:\n", i);
- btree_print_stats(ag_bmap[agno], stdout);
- }
-#endif
+ xfs_agnumber_t i;
+ xfs_agblock_t j;
+ xfs_agblock_t begin;
+ xfs_agblock_t end;
+
+ begin = bnobt_root;
+ end = inobt_root + 1;
+
+ for (i = 0; i < mp->m_sb.sb_agcount; i++) {
/*
- * We always insert an item for the first block having a
- * given state. So the code below means:
- *
- * block 0..ag_hdr_block-1: XR_E_INUSE_FS
- * ag_hdr_block..ag_size: XR_E_UNKNOWN
- * ag_size... XR_E_BAD_STATE
+ * account for btree roots
*/
- btree_clear(ag_bmap[agno]);
- btree_insert(ag_bmap[agno], 0, &states[XR_E_INUSE_FS]);
- btree_insert(ag_bmap[agno],
- ag_hdr_block, &states[XR_E_UNKNOWN]);
- btree_insert(ag_bmap[agno], ag_size, &states[XR_E_BAD_STATE]);
+ for (j = begin; j < end; j++)
+ set_agbno_state(mp, i, j, XR_E_INUSE_FS);
}
- if (mp->m_sb.sb_logstart != 0) {
- set_bmap_ext(XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart),
- XFS_FSB_TO_AGBNO(mp, mp->m_sb.sb_logstart),
- mp->m_sb.sb_logblocks, XR_E_INUSE_FS);
- }
-
- reset_rt_bmap();
+ return;
}
+#endif
void
-init_bmaps(xfs_mount_t *mp)
+incore_init(xfs_mount_t *mp)
{
- xfs_agnumber_t i;
+ int agcount = mp->m_sb.sb_agcount;
+ extern void incore_ino_init(xfs_mount_t *);
+ extern void incore_ext_init(xfs_mount_t *);
- ag_bmap = calloc(mp->m_sb.sb_agcount, sizeof(struct btree_root *));
- if (!ag_bmap)
- do_error(_("couldn't allocate block map btree roots\n"));
+ /* init block alloc bmap */
- ag_locks = calloc(mp->m_sb.sb_agcount, sizeof(pthread_mutex_t));
- if (!ag_locks)
- do_error(_("couldn't allocate block map locks\n"));
+ setup_bmap(agcount, mp->m_sb.sb_agblocks, mp->m_sb.sb_rextents);
+ incore_ino_init(mp);
+ incore_ext_init(mp);
- for (i = 0; i < mp->m_sb.sb_agcount; i++) {
- btree_init(&ag_bmap[i]);
- pthread_mutex_init(&ag_locks[i], NULL);
- }
+ /* initialize random globals now that we know the fs geometry */
- init_rt_bmap(mp);
- reset_bmaps(mp);
+ inodes_per_block = mp->m_sb.sb_inopblock;
+
+ return;
}
-void
-free_bmaps(xfs_mount_t *mp)
+#if defined(XR_BMAP_TRACE) || defined(XR_BMAP_DBG)
+int
+get_agbno_state(xfs_mount_t *mp, xfs_agnumber_t agno,
+ xfs_agblock_t ag_blockno)
{
- xfs_agnumber_t i;
+ __uint64_t *addr;
- for (i = 0; i < mp->m_sb.sb_agcount; i++)
- btree_destroy(ag_bmap[i]);
- free(ag_bmap);
- ag_bmap = NULL;
+ addr = ba_bmap[(agno)] + (ag_blockno)/XR_BB_NUM;
- free_rt_bmap(mp);
+ return((*addr >> (((ag_blockno)%XR_BB_NUM)*XR_BB)) & XR_BB_MASK);
}
+
+void set_agbno_state(xfs_mount_t *mp, xfs_agnumber_t agno,
+ xfs_agblock_t ag_blockno, int state)
+{
+ __uint64_t *addr;
+
+ addr = ba_bmap[(agno)] + (ag_blockno)/XR_BB_NUM;
+
+ *addr = (((*addr) &
+ (~((__uint64_t) XR_BB_MASK << (((ag_blockno)%XR_BB_NUM)*XR_BB)))) |
+ (((__uint64_t) (state)) << (((ag_blockno)%XR_BB_NUM)*XR_BB)));
+}
+
+int
+get_fsbno_state(xfs_mount_t *mp, xfs_dfsbno_t blockno)
+{
+ return(get_agbno_state(mp, XFS_FSB_TO_AGNO(mp, blockno),
+ XFS_FSB_TO_AGBNO(mp, blockno)));
+}
+
+void
+set_fsbno_state(xfs_mount_t *mp, xfs_dfsbno_t blockno, int state)
+{
+ set_agbno_state(mp, XFS_FSB_TO_AGNO(mp, blockno),
+ XFS_FSB_TO_AGBNO(mp, blockno), state);
+
+ return;
+}
+#endif
#define XFS_REPAIR_INCORE_H
#include "avl.h"
-
-
/*
* contains definition information. implementation (code)
* is spread out in separate files.
*/
/*
- * block map -- track state of each filesystem block.
+ * block bit map defs -- track state of each filesystem block.
+ * ba_bmap is an array of bitstrings declared in the globals.h file.
+ * the bitstrings are broken up into 64-bit chunks. one bitstring per AG.
*/
+#define BA_BMAP_SIZE(x) (howmany(x, 4))
-void init_bmaps(xfs_mount_t *mp);
-void reset_bmaps(xfs_mount_t *mp);
-void free_bmaps(xfs_mount_t *mp);
+void set_bmap_rt(xfs_drfsbno_t numblocks);
+void set_bmap_log(xfs_mount_t *mp);
+void set_bmap_fs(xfs_mount_t *mp);
+void teardown_bmap(xfs_mount_t *mp);
-void set_bmap_ext(xfs_agnumber_t agno, xfs_agblock_t agbno,
- xfs_extlen_t blen, int state);
-int get_bmap_ext(xfs_agnumber_t agno, xfs_agblock_t agbno,
- xfs_agblock_t maxbno, xfs_extlen_t *blen);
+void teardown_rt_bmap(xfs_mount_t *mp);
+void teardown_ag_bmap(xfs_mount_t *mp, xfs_agnumber_t agno);
+void teardown_bmap_finish(xfs_mount_t *mp);
-void set_rtbmap(xfs_drtbno_t bno, int state);
-int get_rtbmap(xfs_drtbno_t bno);
+/* blocks are numbered from zero */
-static inline void
-set_bmap(xfs_agnumber_t agno, xfs_agblock_t agbno, int state)
-{
- set_bmap_ext(agno, agbno, 1, state);
-}
+/* block records fit into __uint64_t's units */
+
+#define XR_BB_UNIT 64 /* number of bits/unit */
+#define XR_BB 4 /* bits per block record */
+#define XR_BB_NUM (XR_BB_UNIT/XR_BB) /* number of records per unit */
+#define XR_BB_MASK 0xF /* block record mask */
+
+/*
+ * bitstring ops -- set/get block states, either in filesystem
+ * bno's or in agbno's. turns out that fsbno addressing is
+ * more convenient when dealing with bmap extracted addresses
+ * and agbno addressing is more convenient when dealing with
+ * meta-data extracted addresses. So the fsbno versions use
+ * mtype (which can be one of the block map types above) to
+ * set the correct block map while the agbno versions assume
+ * you want to use the regular block map.
+ */
+
+#if defined(XR_BMAP_TRACE) || defined(XR_BMAP_DBG)
+/*
+ * implemented as functions for debugging purposes
+ */
+int get_agbno_state(xfs_mount_t *mp, xfs_agnumber_t agno,
+ xfs_agblock_t ag_blockno);
+void set_agbno_state(xfs_mount_t *mp, xfs_agnumber_t agno,
+ xfs_agblock_t ag_blockno, int state);
+
+int get_fsbno_state(xfs_mount_t *mp, xfs_dfsbno_t blockno);
+void set_fsbno_state(xfs_mount_t *mp, xfs_dfsbno_t blockno, int state);
+#else
+/*
+ * implemented as macros for performance purposes
+ */
+
+#define get_agbno_state(mp, agno, ag_blockno) \
+ ((int) (*(ba_bmap[(agno)] + (ag_blockno)/XR_BB_NUM) \
+ >> (((ag_blockno)%XR_BB_NUM)*XR_BB)) \
+ & XR_BB_MASK)
+#define set_agbno_state(mp, agno, ag_blockno, state) \
+ *(ba_bmap[(agno)] + (ag_blockno)/XR_BB_NUM) = \
+ ((*(ba_bmap[(agno)] + (ag_blockno)/XR_BB_NUM) & \
+ (~((__uint64_t) XR_BB_MASK << (((ag_blockno)%XR_BB_NUM)*XR_BB)))) | \
+ (((__uint64_t) (state)) << (((ag_blockno)%XR_BB_NUM)*XR_BB)))
+
+#define get_fsbno_state(mp, blockno) \
+ get_agbno_state(mp, XFS_FSB_TO_AGNO(mp, (blockno)), \
+ XFS_FSB_TO_AGBNO(mp, (blockno)))
+#define set_fsbno_state(mp, blockno, state) \
+ set_agbno_state(mp, XFS_FSB_TO_AGNO(mp, (blockno)), \
+ XFS_FSB_TO_AGBNO(mp, (blockno)), (state))
+
+
+#define get_agbno_rec(mp, agno, ag_blockno) \
+ (*(ba_bmap[(agno)] + (ag_blockno)/XR_BB_NUM))
+#endif /* XR_BMAP_TRACE */
+
+/*
+ * these work in real-time extents (e.g. fsbno == rt extent number)
+ */
+#define get_rtbno_state(mp, fsbno) \
+ ((*(rt_ba_bmap + (fsbno)/XR_BB_NUM) >> \
+ (((fsbno)%XR_BB_NUM)*XR_BB)) & XR_BB_MASK)
+#define set_rtbno_state(mp, fsbno, state) \
+ *(rt_ba_bmap + (fsbno)/XR_BB_NUM) = \
+ ((*(rt_ba_bmap + (fsbno)/XR_BB_NUM) & \
+ (~((__uint64_t) XR_BB_MASK << (((fsbno)%XR_BB_NUM)*XR_BB)))) | \
+ (((__uint64_t) (state)) << (((fsbno)%XR_BB_NUM)*XR_BB)))
-static inline int
-get_bmap(xfs_agnumber_t agno, xfs_agblock_t agbno)
-{
- return get_bmap_ext(agno, agbno, agbno + 1, NULL);
-}
/*
* extent tree definitions
/*
* duplicate extent tree functions
*/
+void add_dup_extent(xfs_agnumber_t agno,
+ xfs_agblock_t startblock,
+ xfs_extlen_t blockcount);
+
+extern avltree_desc_t **extent_tree_ptrs;
+/* ARGSUSED */
+static inline int
+search_dup_extent(xfs_mount_t *mp, xfs_agnumber_t agno, xfs_agblock_t agbno)
+{
+ ASSERT(agno < glob_agcount);
+
+ if (avl_findrange(extent_tree_ptrs[agno], agbno) != NULL)
+ return(1);
+
+ return(0);
+}
-int add_dup_extent(xfs_agnumber_t agno, xfs_agblock_t startblock,
- xfs_extlen_t blockcount);
-int search_dup_extent(xfs_agnumber_t agno,
- xfs_agblock_t start_agbno, xfs_agblock_t end_agbno);
void add_rt_dup_extent(xfs_drtbno_t startblock,
xfs_extlen_t blockcount);
*/
void free_rt_dup_extent_tree(xfs_mount_t *mp);
-void incore_ext_init(xfs_mount_t *);
/*
* per-AG extent trees shutdown routine -- all (bno, bcnt and dup)
* at once. this one actually frees the memory instead of just recyling
*/
void incore_ext_teardown(xfs_mount_t *mp);
-void incore_ino_init(xfs_mount_t *);
-
/*
* inode definitions
*/
#include <libxfs.h>
#include "avl.h"
-#include "btree.h"
#include "globals.h"
#include "incore.h"
#include "agheader.h"
static avl64tree_desc_t *rt_ext_tree_ptr; /* dup extent tree for rt */
-static struct btree_root **dup_extent_trees; /* per ag dup extent trees */
-
+avltree_desc_t **extent_tree_ptrs; /* array of extent tree ptrs */
+ /* one per ag for dups */
static avltree_desc_t **extent_bno_ptrs; /*
* array of extent tree ptrs
* one per ag for free extents
static pthread_mutex_t rt_ext_tree_lock;
static pthread_mutex_t rt_ext_flist_lock;
-/*
- * duplicate extent tree functions
- */
-
-void
-release_dup_extent_tree(
- xfs_agnumber_t agno)
-{
- btree_clear(dup_extent_trees[agno]);
-}
-
-int
-add_dup_extent(
- xfs_agnumber_t agno,
- xfs_agblock_t startblock,
- xfs_extlen_t blockcount)
-{
-#ifdef XR_DUP_TRACE
- fprintf(stderr, "Adding dup extent - %d/%d %d\n", agno, startblock,
- blockcount);
-#endif
- return btree_insert(dup_extent_trees[agno], startblock,
- (void *)(uintptr_t)(startblock + blockcount));
-}
-
-int
-search_dup_extent(
- xfs_agnumber_t agno,
- xfs_agblock_t start_agbno,
- xfs_agblock_t end_agbno)
-{
- unsigned long bno;
-
- if (!btree_find(dup_extent_trees[agno], start_agbno, &bno))
- return 0; /* this really shouldn't happen */
- if (bno < end_agbno)
- return 1;
- return (uintptr_t)btree_peek_prev(dup_extent_trees[agno], NULL) >
- start_agbno;
-}
-
-
/*
* extent tree stuff is avl trees of duplicate extents,
* sorted in order by block number. there is one tree per ag.
/*
* top-level (visible) routines
*/
+void
+release_dup_extent_tree(xfs_agnumber_t agno)
+{
+ release_extent_tree(extent_tree_ptrs[agno]);
+
+ return;
+}
+
void
release_agbno_extent_tree(xfs_agnumber_t agno)
{
return(ext);
}
+/*
+ * the next 2 routines manage the trees of duplicate extents -- 1 tree
+ * per AG
+ */
+void
+add_dup_extent(xfs_agnumber_t agno, xfs_agblock_t startblock,
+ xfs_extlen_t blockcount)
+{
+ extent_tree_node_t *first, *last, *ext, *next_ext;
+ xfs_agblock_t new_startblock;
+ xfs_extlen_t new_blockcount;
+
+ ASSERT(agno < glob_agcount);
+
+#ifdef XR_DUP_TRACE
+ fprintf(stderr, "Adding dup extent - %d/%d %d\n", agno, startblock, blockcount);
+#endif
+ avl_findranges(extent_tree_ptrs[agno], startblock - 1,
+ startblock + blockcount + 1,
+ (avlnode_t **) &first, (avlnode_t **) &last);
+ /*
+ * find adjacent and overlapping extent blocks
+ */
+ if (first == NULL && last == NULL) {
+ /* nothing, just make and insert new extent */
+
+ ext = mk_extent_tree_nodes(startblock, blockcount, XR_E_MULT);
+
+ if (avl_insert(extent_tree_ptrs[agno],
+ (avlnode_t *) ext) == NULL) {
+ do_error(_("duplicate extent range\n"));
+ }
+
+ return;
+ }
+
+ ASSERT(first != NULL && last != NULL);
+
+ /*
+ * find the new composite range, delete old extent nodes
+ * as we go
+ */
+ new_startblock = startblock;
+ new_blockcount = blockcount;
+
+ for (ext = first;
+ ext != (extent_tree_node_t *) last->avl_node.avl_nextino;
+ ext = next_ext) {
+ /*
+ * preserve the next inorder node
+ */
+ next_ext = (extent_tree_node_t *) ext->avl_node.avl_nextino;
+ /*
+ * just bail if the new extent is contained within an old one
+ */
+ if (ext->ex_startblock <= startblock &&
+ ext->ex_blockcount >= blockcount)
+ return;
+ /*
+ * now check for overlaps and adjacent extents
+ */
+ if (ext->ex_startblock + ext->ex_blockcount >= startblock
+ || ext->ex_startblock <= startblock + blockcount) {
+
+ if (ext->ex_startblock < new_startblock)
+ new_startblock = ext->ex_startblock;
+
+ if (ext->ex_startblock + ext->ex_blockcount >
+ new_startblock + new_blockcount)
+ new_blockcount = ext->ex_startblock +
+ ext->ex_blockcount -
+ new_startblock;
+
+ avl_delete(extent_tree_ptrs[agno], (avlnode_t *) ext);
+ continue;
+ }
+ }
+
+ ext = mk_extent_tree_nodes(new_startblock, new_blockcount, XR_E_MULT);
+
+ if (avl_insert(extent_tree_ptrs[agno], (avlnode_t *) ext) == NULL) {
+ do_error(_("duplicate extent range\n"));
+ }
+
+ return;
+}
+
static __psunsigned_t
avl_ext_start(avlnode_t *node)
{
pthread_mutex_init(&rt_ext_tree_lock, NULL);
pthread_mutex_init(&rt_ext_flist_lock, NULL);
- dup_extent_trees = calloc(agcount, sizeof(struct btree_root *));
- if (!dup_extent_trees)
- do_error(_("couldn't malloc dup extent tree descriptor table\n"));
+ if ((extent_tree_ptrs = malloc(agcount *
+ sizeof(avltree_desc_t *))) == NULL)
+ do_error(
+ _("couldn't malloc dup extent tree descriptor table\n"));
if ((extent_bno_ptrs = malloc(agcount *
sizeof(avltree_desc_t *))) == NULL)
_("couldn't malloc free by-bcnt extent tree descriptor table\n"));
for (i = 0; i < agcount; i++) {
+ if ((extent_tree_ptrs[i] =
+ malloc(sizeof(avltree_desc_t))) == NULL)
+ do_error(
+ _("couldn't malloc dup extent tree descriptor\n"));
if ((extent_bno_ptrs[i] =
malloc(sizeof(avltree_desc_t))) == NULL)
do_error(
}
for (i = 0; i < agcount; i++) {
- btree_init(&dup_extent_trees[i]);
+ avl_init_tree(extent_tree_ptrs[i], &avl_extent_tree_ops);
avl_init_tree(extent_bno_ptrs[i], &avl_extent_tree_ops);
avl_init_tree(extent_bcnt_ptrs[i], &avl_extent_bcnt_tree_ops);
}
free(cur);
for (i = 0; i < mp->m_sb.sb_agcount; i++) {
- btree_destroy(dup_extent_trees[i]);
+ free(extent_tree_ptrs[i]);
free(extent_bno_ptrs[i]);
free(extent_bcnt_ptrs[i]);
}
- free(dup_extent_trees);
free(extent_bcnt_ptrs);
free(extent_bno_ptrs);
+ free(extent_tree_ptrs);
- dup_extent_trees = NULL;
- extent_bcnt_ptrs = NULL;
- extent_bno_ptrs = NULL;
+ extent_bcnt_ptrs = extent_bno_ptrs = extent_tree_ptrs = NULL;
+
+ return;
}
int
#include "pthread.h"
#include "avl.h"
#include "dir.h"
-#include "bmap.h"
#include "incore.h"
#include "prefetch.h"
+#include "radix-tree.h"
#include <sys/resource.h>
-/* TODO: dirbuf/freemap key usage is completely b0rked - only used for dirv1 */
static pthread_key_t dirbuf_key;
static pthread_key_t dir_freemap_key;
static pthread_key_t attr_freemap_key;
-extern pthread_key_t dblkmap_key;
-extern pthread_key_t ablkmap_key;
-
static void
ts_alloc(pthread_key_t key, unsigned n, size_t size)
{
void *voidp;
- voidp = calloc(n, size);
+ voidp = malloc((n)*(size));
if (voidp == NULL) {
do_error(_("ts_alloc: cannot allocate thread specific storage\n"));
/* NO RETURN */
pthread_key_create(&dirbuf_key, NULL);
pthread_key_create(&dir_freemap_key, NULL);
pthread_key_create(&attr_freemap_key, NULL);
-
- pthread_key_create(&dblkmap_key, NULL);
- pthread_key_create(&ablkmap_key, NULL);
}
void
ts_create();
ts_init();
increase_rlimit();
- pftrace_init();
+ radix_tree_init();
}
phase2(xfs_mount_t *mp)
{
xfs_agnumber_t i;
+ xfs_agblock_t b;
int j;
ino_tree_node_t *ino_rec;
do_log(_(" - scan filesystem freespace and inode maps...\n"));
+ /*
+ * account for space used by ag headers and log if internal
+ */
+ set_bmap_log(mp);
+ set_bmap_fs(mp);
+
bad_ino_btree = 0;
set_progress_msg(PROG_FMT_SCAN_AG, (__uint64_t) glob_agcount);
/*
* also mark blocks
*/
- set_bmap_ext(0, XFS_INO_TO_AGBNO(mp, mp->m_sb.sb_rootino),
- mp->m_ialloc_blks, XR_E_INO);
+ for (b = 0; b < mp->m_ialloc_blks; b++) {
+ set_agbno_state(mp, 0,
+ b + XFS_INO_TO_AGBNO(mp, mp->m_sb.sb_rootino),
+ XR_E_INO);
+ }
} else {
do_log(_(" - found root inode chunk\n"));
agbno = XFS_AGINO_TO_AGBNO(mp, current_ino);
pthread_mutex_lock(&ag_locks[agno]);
- state = get_bmap(agno, agbno);
- switch (state) {
+ switch (state = get_agbno_state(mp,
+ agno, agbno)) {
+ case XR_E_UNKNOWN:
+ case XR_E_FREE:
+ case XR_E_FREE1:
+ set_agbno_state(mp, agno, agbno,
+ XR_E_INO);
+ break;
case XR_E_BAD_STATE:
do_error(_(
"bad state in block map %d\n"),
* anyway, hopefully without
* losing too much other data
*/
- set_bmap(agno, agbno, XR_E_INO);
+ set_agbno_state(mp, agno, agbno,
+ XR_E_INO);
break;
}
pthread_mutex_unlock(&ag_locks[agno]);
xfs_agnumber_t i;
xfs_agblock_t j;
xfs_agblock_t ag_end;
- xfs_extlen_t blen;
+ xfs_agblock_t extent_start;
+ xfs_extlen_t extent_len;
int ag_hdr_len = 4 * mp->m_sb.sb_sectsize;
int ag_hdr_block;
int bstate;
ag_end = (i < mp->m_sb.sb_agcount - 1) ? mp->m_sb.sb_agblocks :
mp->m_sb.sb_dblocks -
(xfs_drfsbno_t) mp->m_sb.sb_agblocks * i;
-
+ extent_start = extent_len = 0;
/*
* set up duplicate extent list for this ag
*/
- for (j = ag_hdr_block; j < ag_end; j += blen) {
- bstate = get_bmap_ext(i, j, ag_end, &blen);
- switch (bstate) {
+ for (j = ag_hdr_block; j < ag_end; j++) {
+
+ /* Process in chunks of 16 (XR_BB_UNIT/XR_BB) */
+ if ((extent_start == 0) && ((j & XR_BB_MASK) == 0)) {
+ switch(ba_bmap[i][j>>XR_BB]) {
+ case XR_E_UNKNOWN_LL:
+ case XR_E_FREE1_LL:
+ case XR_E_FREE_LL:
+ case XR_E_INUSE_LL:
+ case XR_E_INUSE_FS_LL:
+ case XR_E_INO_LL:
+ case XR_E_FS_MAP_LL:
+ j += (XR_BB_UNIT/XR_BB) - 1;
+ continue;
+ }
+ }
+
+ bstate = get_agbno_state(mp, i, j);
+
+ switch (bstate) {
case XR_E_BAD_STATE:
default:
do_warn(
case XR_E_INUSE_FS:
case XR_E_INO:
case XR_E_FS_MAP:
+ if (extent_start == 0)
+ continue;
+ else {
+ /*
+ * add extent and reset extent state
+ */
+ add_dup_extent(i, extent_start,
+ extent_len);
+ extent_start = 0;
+ extent_len = 0;
+ }
break;
case XR_E_MULT:
- add_dup_extent(i, j, blen);
+ if (extent_start == 0) {
+ extent_start = j;
+ extent_len = 1;
+ } else if (extent_len == MAXEXTLEN) {
+ add_dup_extent(i, extent_start,
+ extent_len);
+ extent_start = j;
+ extent_len = 1;
+ } else
+ extent_len++;
break;
}
}
-
+ /*
+ * catch tail-case, extent hitting the end of the ag
+ */
+ if (extent_start != 0)
+ add_dup_extent(i, extent_start, extent_len);
PROG_RPT_INC(prog_rpt_done[i], 1);
}
print_final_rpt();
rt_len = 0;
for (bno = 0; bno < mp->m_sb.sb_rextents; bno++) {
- bstate = get_rtbmap(bno);
+
+ bstate = get_rtbno_state(mp, bno);
+
switch (bstate) {
case XR_E_BAD_STATE:
default:
/*
* initialize bitmaps for all AGs
*/
- reset_bmaps(mp);
+ for (i = 0; i < mp->m_sb.sb_agcount; i++) {
+ /*
+ * now reset the bitmap for all ags
+ */
+ memset(ba_bmap[i], 0,
+ roundup((mp->m_sb.sb_agblocks+(NBBY/XR_BB)-1)/(NBBY/XR_BB),
+ sizeof(__uint64_t)));
+ for (j = 0; j < ag_hdr_block; j++)
+ set_agbno_state(mp, i, j, XR_E_INUSE_FS);
+ }
+ set_bmap_rt(mp->m_sb.sb_rextents);
+ set_bmap_log(mp);
+ set_bmap_fs(mp);
do_log(_(" - check for inodes claiming duplicate blocks...\n"));
set_progress_msg(PROG_FMT_DUP_BLOCKS, (__uint64_t) mp->m_sb.sb_icount);
xfs_agblock_t agbno;
xfs_agblock_t ag_end;
uint free_blocks;
- xfs_extlen_t blen;
- int bstate;
+#ifdef XR_BLD_FREE_TRACE
+ int old_state;
+ int state = XR_E_BAD_STATE;
+#endif
/*
* scan the bitmap for the ag looking for continuous
* ok, now find the number of extents, keep track of the
* largest extent.
*/
- for (agbno = 0; agbno < ag_end; agbno += blen) {
- bstate = get_bmap_ext(agno, agbno, ag_end, &blen);
- if (bstate < XR_E_INUSE) {
- free_blocks += blen;
+ for (agbno = 0; agbno < ag_end; agbno++) {
+#if 0
+ old_state = state;
+ state = get_agbno_state(mp, agno, agbno);
+ if (state != old_state) {
+ fprintf(stderr, "agbno %u - new state is %d\n",
+ agbno, state);
+ }
+#endif
+ /* Process in chunks of 16 (XR_BB_UNIT/XR_BB) */
+ if ((in_extent == 0) && ((agbno & XR_BB_MASK) == 0)) {
+ /* testing >= XR_E_INUSE */
+ switch (ba_bmap[agno][agbno>>XR_BB]) {
+ case XR_E_INUSE_LL:
+ case XR_E_INUSE_FS_LL:
+ case XR_E_INO_LL:
+ case XR_E_FS_MAP_LL:
+ agbno += (XR_BB_UNIT/XR_BB) - 1;
+ continue;
+ }
+
+ }
+ if (get_agbno_state(mp, agno, agbno) < XR_E_INUSE) {
+ free_blocks++;
if (in_extent == 0) {
/*
* found the start of a free extent
in_extent = 1;
num_extents++;
extent_start = agbno;
- extent_len = blen;
+ extent_len = 1;
} else {
- extent_len += blen;
+ extent_len++;
}
} else {
if (in_extent) {
agno);
}
+ /*
+ * done with the AG bitmap, toss it...
+ */
+ teardown_ag_bmap(mp, agno);
+
/*
* ok, now set up the btree cursors for the
* on-disk btrees (includs pre-allocating all
_(" - generate realtime summary info and bitmap...\n"));
rtinit(mp);
generate_rtinfo(mp, btmcompute, sumcompute);
+ teardown_rt_bmap(mp);
}
do_log(_(" - reset superblock...\n"));
do_log(_("Phase 6 - check inode connectivity...\n"));
+ if (!no_modify)
+ teardown_bmap_finish(mp);
+ else
+ teardown_bmap(mp);
+
incore_ext_teardown(mp);
add_ino_ex_data(mp);
#include <libxfs.h>
#include <pthread.h>
#include "avl.h"
-#include "btree.h"
#include "globals.h"
#include "agheader.h"
#include "incore.h"
#include "threads.h"
#include "prefetch.h"
#include "progress.h"
+#include "radix-tree.h"
int do_prefetch = 1;
prefetch_args_t *args)
{
if (!args->can_start_processing) {
+#ifdef XR_PF_TRACE
pftrace("signalling processing for AG %d", args->agno);
-
+#endif
args->can_start_processing = 1;
pthread_cond_signal(&args->start_processing);
}
prefetch_args_t *args)
{
if (!args->can_start_reading) {
+#ifdef XR_PF_TRACE
pftrace("signalling reading for AG %d", args->agno);
-
+#endif
args->can_start_reading = 1;
pthread_cond_broadcast(&args->start_reading);
}
pthread_mutex_lock(&args->lock);
- btree_insert(args->io_queue, fsbno, bp);
-
if (fsbno > args->last_bno_read) {
- if (B_IS_INODE(flag)) {
+ radix_tree_insert(&args->primary_io_queue, fsbno, bp);
+ if (!B_IS_INODE(flag))
+ radix_tree_tag_set(&args->primary_io_queue, fsbno, 0);
+ else {
args->inode_bufs_queued++;
if (args->inode_bufs_queued == IO_THRESHOLD)
pf_start_io_workers(args);
}
+#ifdef XR_PF_TRACE
+ pftrace("getbuf %c %p (%llu) in AG %d (fsbno = %lu) added to "
+ "primary queue (inode_bufs_queued = %d, last_bno = %lu)",
+ B_IS_INODE(flag) ? 'I' : 'M', bp,
+ (long long)XFS_BUF_ADDR(bp), args->agno, fsbno,
+ args->inode_bufs_queued, args->last_bno_read);
+#endif
} else {
+#ifdef XR_PF_TRACE
+ pftrace("getbuf %c %p (%llu) in AG %d (fsbno = %lu) added to "
+ "secondary queue (last_bno = %lu)",
+ B_IS_INODE(flag) ? 'I' : 'M', bp,
+ (long long)XFS_BUF_ADDR(bp), args->agno, fsbno,
+ args->last_bno_read);
+#endif
ASSERT(!B_IS_INODE(flag));
XFS_BUF_SET_PRIORITY(bp, B_DIR_META_2);
+ radix_tree_insert(&args->secondary_io_queue, fsbno, bp);
}
- pftrace("getbuf %c %p (%llu) in AG %d (fsbno = %lu) added to queue"
- "(inode_bufs_queued = %d, last_bno = %lu)", B_IS_INODE(flag) ?
- 'I' : 'M', bp, (long long)XFS_BUF_ADDR(bp), args->agno, fsbno,
- args->inode_bufs_queued, args->last_bno_read);
-
pf_start_processing(args);
pthread_mutex_unlock(&args->lock);
while (irec.br_blockcount) {
unsigned int len;
-
+#ifdef XR_PF_TRACE
pftrace("queuing dir extent in AG %d", args->agno);
-
+#endif
len = (irec.br_blockcount > mp->m_dirblkfsbs) ?
mp->m_dirblkfsbs : irec.br_blockcount;
pf_queue_io(args, irec.br_startblock, len, B_DIR_META);
pf_which_t which,
void *buf)
{
+ struct radix_tree_root *queue;
xfs_buf_t *bplist[MAX_BUFS];
unsigned int num;
off64_t first_off, last_off, next_off;
int i;
int inode_bufs;
unsigned long fsbno;
- unsigned long max_fsbno;
char *pbuf;
- for (;;) {
- num = 0;
- if (which == PF_SECONDARY) {
- bplist[0] = btree_find(args->io_queue, 0, &fsbno);
- max_fsbno = MIN(fsbno + pf_max_fsbs,
- args->last_bno_read);
+ queue = (which != PF_SECONDARY) ? &args->primary_io_queue
+ : &args->secondary_io_queue;
+
+ while (radix_tree_lookup_first(queue, &fsbno) != NULL) {
+
+ if (which != PF_META_ONLY) {
+ num = radix_tree_gang_lookup_ex(queue,
+ (void**)&bplist[0], fsbno,
+ fsbno + pf_max_fsbs, MAX_BUFS);
+ ASSERT(num > 0);
+ ASSERT(XFS_FSB_TO_DADDR(mp, fsbno) ==
+ XFS_BUF_ADDR(bplist[0]));
} else {
- bplist[0] = btree_find(args->io_queue,
- args->last_bno_read, &fsbno);
- max_fsbno = fsbno + pf_max_fsbs;
- }
- while (bplist[num] && num < MAX_BUFS && fsbno < max_fsbno) {
- if (which != PF_META_ONLY ||
- !B_IS_INODE(XFS_BUF_PRIORITY(bplist[num])))
- num++;
- bplist[num] = btree_lookup_next(args->io_queue, &fsbno);
+ num = radix_tree_gang_lookup_tag(queue,
+ (void**)&bplist[0], fsbno,
+ MAX_BUFS / 4, 0);
+ if (num == 0)
+ return;
}
- if (!num)
- return;
/*
* do a big read if 25% of the potential buffer is useful,
*/
first_off = LIBXFS_BBTOOFF64(XFS_BUF_ADDR(bplist[0]));
last_off = LIBXFS_BBTOOFF64(XFS_BUF_ADDR(bplist[num-1])) +
- XFS_BUF_SIZE(bplist[num-1]);
+ XFS_BUF_SIZE(bplist[num-1]);
while (last_off - first_off > pf_max_bytes) {
num--;
- last_off = LIBXFS_BBTOOFF64(XFS_BUF_ADDR(
- bplist[num-1])) + XFS_BUF_SIZE(bplist[num-1]);
+ last_off = LIBXFS_BBTOOFF64(XFS_BUF_ADDR(bplist[num-1])) +
+ XFS_BUF_SIZE(bplist[num-1]);
}
- if (num < ((last_off - first_off) >>
- (mp->m_sb.sb_blocklog + 3))) {
+ if (num < ((last_off - first_off) >> (mp->m_sb.sb_blocklog + 3))) {
/*
* not enough blocks for one big read, so determine
* the number of blocks that are close enough.
*/
last_off = first_off + XFS_BUF_SIZE(bplist[0]);
for (i = 1; i < num; i++) {
- next_off = LIBXFS_BBTOOFF64(XFS_BUF_ADDR(
- bplist[i])) + XFS_BUF_SIZE(bplist[i]);
+ next_off = LIBXFS_BBTOOFF64(XFS_BUF_ADDR(bplist[i])) +
+ XFS_BUF_SIZE(bplist[i]);
if (next_off - last_off > pf_batch_bytes)
break;
last_off = next_off;
}
for (i = 0; i < num; i++) {
- if (btree_delete(args->io_queue, XFS_DADDR_TO_FSB(mp,
+ if (radix_tree_delete(queue, XFS_DADDR_TO_FSB(mp,
XFS_BUF_ADDR(bplist[i]))) == NULL)
do_error(_("prefetch corruption\n"));
}
}
}
for (i = 0; i < num; i++) {
+#ifdef XR_PF_TRACE
pftrace("putbuf %c %p (%llu) in AG %d",
B_IS_INODE(XFS_BUF_PRIORITY(bplist[i])) ? 'I' : 'M',
bplist[i], (long long)XFS_BUF_ADDR(bplist[i]),
args->agno);
+#endif
libxfs_putbuf(bplist[i]);
}
pthread_mutex_lock(&args->lock);
if (which != PF_SECONDARY) {
+#ifdef XR_PF_TRACE
pftrace("inode_bufs_queued for AG %d = %d", args->agno,
args->inode_bufs_queued);
+#endif
/*
* if primary inode queue running low, process metadata
* in boths queues to avoid I/O starvation as the
*/
if (which == PF_PRIMARY && !args->queuing_done &&
args->inode_bufs_queued < IO_THRESHOLD) {
+#ifdef XR_PF_TRACE
pftrace("reading metadata bufs from primary queue for AG %d",
args->agno);
-
+#endif
pf_batch_read(args, PF_META_ONLY, buf);
-
+#ifdef XR_PF_TRACE
pftrace("reading bufs from secondary queue for AG %d",
args->agno);
-
+#endif
pf_batch_read(args, PF_SECONDARY, buf);
}
}
return NULL;
pthread_mutex_lock(&args->lock);
- while (!args->queuing_done || btree_find(args->io_queue, 0, NULL)) {
+ while (!args->queuing_done || args->primary_io_queue.height) {
+#ifdef XR_PF_TRACE
pftrace("waiting to start prefetch I/O for AG %d", args->agno);
-
+#endif
while (!args->can_start_reading && !args->queuing_done)
pthread_cond_wait(&args->start_reading, &args->lock);
-
+#ifdef XR_PF_TRACE
pftrace("starting prefetch I/O for AG %d", args->agno);
-
+#endif
pf_batch_read(args, PF_PRIMARY, buf);
pf_batch_read(args, PF_SECONDARY, buf);
+#ifdef XR_PF_TRACE
pftrace("ran out of bufs to prefetch for AG %d", args->agno);
-
+#endif
if (!args->queuing_done)
args->can_start_reading = 0;
}
free(buf);
+#ifdef XR_PF_TRACE
pftrace("finished prefetch I/O for AG %d", args->agno);
-
+#endif
return NULL;
}
break;
}
}
+
+#ifdef XR_PF_TRACE
pftrace("starting prefetch for AG %d", args->agno);
+#endif
for (irec = findfirst_inode_rec(args->agno); irec != NULL;
irec = next_ino_rec(irec)) {
pthread_mutex_lock(&args->lock);
+#ifdef XR_PF_TRACE
pftrace("finished queuing inodes for AG %d (inode_bufs_queued = %d)",
args->agno, args->inode_bufs_queued);
-
+#endif
args->queuing_done = 1;
pf_start_io_workers(args);
pf_start_processing(args);
if (args->io_threads[i])
pthread_join(args->io_threads[i], NULL);
+#ifdef XR_PF_TRACE
pftrace("prefetch for AG %d finished", args->agno);
-
+#endif
pthread_mutex_lock(&args->lock);
- ASSERT(btree_find(args->io_queue, 0, NULL) == NULL);
+ ASSERT(args->primary_io_queue.height == 0);
+ ASSERT(args->secondary_io_queue.height == 0);
args->prefetch_done = 1;
if (args->next_args)
{
int err;
+#ifdef XR_PF_TRACE
pftrace("creating queue thread for AG %d", args->agno);
-
+#endif
err = pthread_create(&args->queuing_thread, NULL,
pf_queuing_worker, args);
if (err != 0) {
args = calloc(1, sizeof(prefetch_args_t));
- btree_init(&args->io_queue);
+ INIT_RADIX_TREE(&args->primary_io_queue, 0);
+ INIT_RADIX_TREE(&args->secondary_io_queue, 0);
if (pthread_mutex_init(&args->lock, NULL) != 0)
do_error(_("failed to initialize prefetch mutex\n"));
if (pthread_cond_init(&args->start_reading, NULL) != 0)
pthread_mutex_lock(&args->lock);
while (!args->can_start_processing) {
+#ifdef XR_PF_TRACE
pftrace("waiting to start processing AG %d", args->agno);
-
+#endif
pthread_cond_wait(&args->start_processing, &args->lock);
}
+#ifdef XR_PF_TRACE
pftrace("can start processing AG %d", args->agno);
-
+#endif
pthread_mutex_unlock(&args->lock);
}
if (args == NULL)
return;
+#ifdef XR_PF_TRACE
pftrace("waiting AG %d prefetch to finish", args->agno);
-
+#endif
if (args->queuing_thread)
pthread_join(args->queuing_thread, NULL);
+#ifdef XR_PF_TRACE
pftrace("AG %d prefetch done", args->agno);
-
+#endif
pthread_mutex_destroy(&args->lock);
pthread_cond_destroy(&args->start_reading);
pthread_cond_destroy(&args->start_processing);
sem_destroy(&args->ra_count);
- btree_destroy(args->io_queue);
free(args);
}
#ifdef XR_PF_TRACE
-static FILE *pf_trace_file;
-
-void
-pftrace_init(void)
-{
- pf_trace_file = fopen("/tmp/xfs_repair_prefetch.trace", "w");
- setvbuf(pf_trace_file, NULL, _IOLBF, 1024);
-}
-
-void
-pftrace_done(void)
-{
- fclose(pf_trace_file);
-}
-
void
_pftrace(const char *func, const char *msg, ...)
{
buf[sizeof(buf)-1] = '\0';
va_end(args);
- fprintf(pf_trace_file, "%lu.%06lu %s: %s\n", tv.tv_sec, tv.tv_usec,
- func, buf);
+ fprintf(pf_trace_file, "%lu.%06lu %s: %s\n", tv.tv_sec, tv.tv_usec, func, buf);
}
#endif
#include <semaphore.h>
#include "incore.h"
+#include "radix-tree.h"
extern int do_prefetch;
pthread_mutex_t lock;
pthread_t queuing_thread;
pthread_t io_threads[PF_THREAD_COUNT];
- struct btree_root *io_queue;
+ struct radix_tree_root primary_io_queue;
+ struct radix_tree_root secondary_io_queue;
pthread_cond_t start_reading;
pthread_cond_t start_processing;
int agno;
#ifdef XR_PF_TRACE
-void pftrace_init(void);
-void pftrace_done(void);
-
#define pftrace(msg...) _pftrace(__FUNCTION__, ## msg)
void _pftrace(const char *, const char *, ...);
-#else
-static inline void pftrace_init(void) { };
-static inline void pftrace_done(void) { };
-static inline void pftrace(const char *msg, ...) { };
#endif
#endif /* _XFS_REPAIR_PREFETCH_H */
--- /dev/null
+/*
+ * Copyright (C) 2001 Momchil Velikov
+ * Portions Copyright (C) 2001 Christoph Hellwig
+ * Copyright (C) 2005 SGI, Christoph Lameter <clameter@sgi.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <libxfs.h>
+#include "radix-tree.h"
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+#define RADIX_TREE_MAP_SHIFT 6
+#define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT)
+#define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE-1)
+
+#ifdef RADIX_TREE_TAGS
+#define RADIX_TREE_TAG_LONGS \
+ ((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG)
+#endif
+
+struct radix_tree_node {
+ unsigned int count;
+ void *slots[RADIX_TREE_MAP_SIZE];
+#ifdef RADIX_TREE_TAGS
+ unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];
+#endif
+};
+
+struct radix_tree_path {
+ struct radix_tree_node *node;
+ int offset;
+};
+
+#define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long))
+#define RADIX_TREE_MAX_PATH (RADIX_TREE_INDEX_BITS/RADIX_TREE_MAP_SHIFT + 2)
+
+static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH];
+
+/*
+ * Radix tree node cache.
+ */
+
+#define radix_tree_node_alloc(r) ((struct radix_tree_node *) \
+ calloc(1, sizeof(struct radix_tree_node)))
+#define radix_tree_node_free(n) free(n)
+
+#ifdef RADIX_TREE_TAGS
+
+static inline void tag_set(struct radix_tree_node *node, unsigned int tag,
+ int offset)
+{
+ *((__uint32_t *)node->tags[tag] + (offset >> 5)) |= (1 << (offset & 31));
+}
+
+static inline void tag_clear(struct radix_tree_node *node, unsigned int tag,
+ int offset)
+{
+ __uint32_t *p = (__uint32_t*)node->tags[tag] + (offset >> 5);
+ __uint32_t m = 1 << (offset & 31);
+ *p &= ~m;
+}
+
+static inline int tag_get(struct radix_tree_node *node, unsigned int tag,
+ int offset)
+{
+ return 1 & (((const __uint32_t *)node->tags[tag])[offset >> 5] >> (offset & 31));
+}
+
+/*
+ * Returns 1 if any slot in the node has this tag set.
+ * Otherwise returns 0.
+ */
+static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag)
+{
+ int idx;
+ for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) {
+ if (node->tags[tag][idx])
+ return 1;
+ }
+ return 0;
+}
+
+#endif
+
+/*
+ * Return the maximum key which can be store into a
+ * radix tree with height HEIGHT.
+ */
+static inline unsigned long radix_tree_maxindex(unsigned int height)
+{
+ return height_to_maxindex[height];
+}
+
+/*
+ * Extend a radix tree so it can store key @index.
+ */
+static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
+{
+ struct radix_tree_node *node;
+ unsigned int height;
+#ifdef RADIX_TREE_TAGS
+ char tags[RADIX_TREE_MAX_TAGS];
+ int tag;
+#endif
+
+ /* Figure out what the height should be. */
+ height = root->height + 1;
+ while (index > radix_tree_maxindex(height))
+ height++;
+
+ if (root->rnode == NULL) {
+ root->height = height;
+ goto out;
+ }
+
+#ifdef RADIX_TREE_TAGS
+ /*
+ * Prepare the tag status of the top-level node for propagation
+ * into the newly-pushed top-level node(s)
+ */
+ for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
+ tags[tag] = 0;
+ if (any_tag_set(root->rnode, tag))
+ tags[tag] = 1;
+ }
+#endif
+ do {
+ if (!(node = radix_tree_node_alloc(root)))
+ return -ENOMEM;
+
+ /* Increase the height. */
+ node->slots[0] = root->rnode;
+
+#ifdef RADIX_TREE_TAGS
+ /* Propagate the aggregated tag info into the new root */
+ for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
+ if (tags[tag])
+ tag_set(node, tag, 0);
+ }
+#endif
+ node->count = 1;
+ root->rnode = node;
+ root->height++;
+ } while (height > root->height);
+out:
+ return 0;
+}
+
+/**
+ * radix_tree_insert - insert into a radix tree
+ * @root: radix tree root
+ * @index: index key
+ * @item: item to insert
+ *
+ * Insert an item into the radix tree at position @index.
+ */
+int radix_tree_insert(struct radix_tree_root *root,
+ unsigned long index, void *item)
+{
+ struct radix_tree_node *node = NULL, *slot;
+ unsigned int height, shift;
+ int offset;
+ int error;
+
+ /* Make sure the tree is high enough. */
+ if ((!index && !root->rnode) ||
+ index > radix_tree_maxindex(root->height)) {
+ error = radix_tree_extend(root, index);
+ if (error)
+ return error;
+ }
+
+ slot = root->rnode;
+ height = root->height;
+ shift = (height-1) * RADIX_TREE_MAP_SHIFT;
+
+ offset = 0; /* uninitialised var warning */
+ do {
+ if (slot == NULL) {
+ /* Have to add a child node. */
+ if (!(slot = radix_tree_node_alloc(root)))
+ return -ENOMEM;
+ if (node) {
+ node->slots[offset] = slot;
+ node->count++;
+ } else
+ root->rnode = slot;
+ }
+
+ /* Go a level down */
+ offset = (index >> shift) & RADIX_TREE_MAP_MASK;
+ node = slot;
+ slot = node->slots[offset];
+ shift -= RADIX_TREE_MAP_SHIFT;
+ height--;
+ } while (height > 0);
+
+ if (slot != NULL)
+ return -EEXIST;
+
+ ASSERT(node);
+ node->count++;
+ node->slots[offset] = item;
+#ifdef RADIX_TREE_TAGS
+ ASSERT(!tag_get(node, 0, offset));
+ ASSERT(!tag_get(node, 1, offset));
+#endif
+ return 0;
+}
+
+static inline void **__lookup_slot(struct radix_tree_root *root,
+ unsigned long index)
+{
+ unsigned int height, shift;
+ struct radix_tree_node **slot;
+
+ height = root->height;
+ if (index > radix_tree_maxindex(height))
+ return NULL;
+
+ shift = (height-1) * RADIX_TREE_MAP_SHIFT;
+ slot = &root->rnode;
+
+ while (height > 0) {
+ if (*slot == NULL)
+ return NULL;
+
+ slot = (struct radix_tree_node **)
+ ((*slot)->slots +
+ ((index >> shift) & RADIX_TREE_MAP_MASK));
+ shift -= RADIX_TREE_MAP_SHIFT;
+ height--;
+ }
+
+ return (void **)slot;
+}
+
+/**
+ * radix_tree_lookup_slot - lookup a slot in a radix tree
+ * @root: radix tree root
+ * @index: index key
+ *
+ * Lookup the slot corresponding to the position @index in the radix tree
+ * @root. This is useful for update-if-exists operations.
+ */
+void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index)
+{
+ return __lookup_slot(root, index);
+}
+
+/**
+ * radix_tree_lookup - perform lookup operation on a radix tree
+ * @root: radix tree root
+ * @index: index key
+ *
+ * Lookup the item at the position @index in the radix tree @root.
+ */
+void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index)
+{
+ void **slot;
+
+ slot = __lookup_slot(root, index);
+ return slot != NULL ? *slot : NULL;
+}
+
+/**
+ * raid_tree_first_key - find the first index key in the radix tree
+ * @root: radix tree root
+ * @index: where the first index will be placed
+ *
+ * Returns the first entry and index key in the radix tree @root.
+ */
+void *radix_tree_lookup_first(struct radix_tree_root *root, unsigned long *index)
+{
+ unsigned int height, shift;
+ struct radix_tree_node *slot;
+ unsigned long i;
+
+ height = root->height;
+ *index = 0;
+ if (height == 0)
+ return NULL;
+
+ shift = (height-1) * RADIX_TREE_MAP_SHIFT;
+ slot = root->rnode;
+
+ for (; height > 1; height--) {
+ for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
+ if (slot->slots[i] != NULL)
+ break;
+ }
+ ASSERT(i < RADIX_TREE_MAP_SIZE);
+
+ *index |= (i << shift);
+ shift -= RADIX_TREE_MAP_SHIFT;
+ slot = slot->slots[i];
+ }
+ for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
+ if (slot->slots[i] != NULL) {
+ *index |= i;
+ return slot->slots[i];
+ }
+ }
+ return NULL;
+}
+
+#ifdef RADIX_TREE_TAGS
+
+/**
+ * radix_tree_tag_set - set a tag on a radix tree node
+ * @root: radix tree root
+ * @index: index key
+ * @tag: tag index
+ *
+ * Set the search tag (which must be < RADIX_TREE_MAX_TAGS)
+ * corresponding to @index in the radix tree. From
+ * the root all the way down to the leaf node.
+ *
+ * Returns the address of the tagged item. Setting a tag on a not-present
+ * item is a bug.
+ */
+void *radix_tree_tag_set(struct radix_tree_root *root,
+ unsigned long index, unsigned int tag)
+{
+ unsigned int height, shift;
+ struct radix_tree_node *slot;
+
+ height = root->height;
+ if (index > radix_tree_maxindex(height))
+ return NULL;
+
+ shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
+ slot = root->rnode;
+
+ while (height > 0) {
+ int offset;
+
+ offset = (index >> shift) & RADIX_TREE_MAP_MASK;
+ if (!tag_get(slot, tag, offset))
+ tag_set(slot, tag, offset);
+ slot = slot->slots[offset];
+ ASSERT(slot != NULL);
+ shift -= RADIX_TREE_MAP_SHIFT;
+ height--;
+ }
+
+ return slot;
+}
+
+/**
+ * radix_tree_tag_clear - clear a tag on a radix tree node
+ * @root: radix tree root
+ * @index: index key
+ * @tag: tag index
+ *
+ * Clear the search tag (which must be < RADIX_TREE_MAX_TAGS)
+ * corresponding to @index in the radix tree. If
+ * this causes the leaf node to have no tags set then clear the tag in the
+ * next-to-leaf node, etc.
+ *
+ * Returns the address of the tagged item on success, else NULL. ie:
+ * has the same return value and semantics as radix_tree_lookup().
+ */
+void *radix_tree_tag_clear(struct radix_tree_root *root,
+ unsigned long index, unsigned int tag)
+{
+ struct radix_tree_path path[RADIX_TREE_MAX_PATH], *pathp = path;
+ struct radix_tree_node *slot;
+ unsigned int height, shift;
+ void *ret = NULL;
+
+ height = root->height;
+ if (index > radix_tree_maxindex(height))
+ goto out;
+
+ shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
+ pathp->node = NULL;
+ slot = root->rnode;
+
+ while (height > 0) {
+ int offset;
+
+ if (slot == NULL)
+ goto out;
+
+ offset = (index >> shift) & RADIX_TREE_MAP_MASK;
+ pathp[1].offset = offset;
+ pathp[1].node = slot;
+ slot = slot->slots[offset];
+ pathp++;
+ shift -= RADIX_TREE_MAP_SHIFT;
+ height--;
+ }
+
+ ret = slot;
+ if (ret == NULL)
+ goto out;
+
+ do {
+ if (!tag_get(pathp->node, tag, pathp->offset))
+ goto out;
+ tag_clear(pathp->node, tag, pathp->offset);
+ if (any_tag_set(pathp->node, tag))
+ goto out;
+ pathp--;
+ } while (pathp->node);
+out:
+ return ret;
+}
+
+#endif
+
+static unsigned int
+__lookup(struct radix_tree_root *root, void **results, unsigned long index,
+ unsigned int max_items, unsigned long *next_index)
+{
+ unsigned int nr_found = 0;
+ unsigned int shift, height;
+ struct radix_tree_node *slot;
+ unsigned long i;
+
+ height = root->height;
+ if (height == 0)
+ goto out;
+
+ shift = (height-1) * RADIX_TREE_MAP_SHIFT;
+ slot = root->rnode;
+
+ for ( ; height > 1; height--) {
+
+ for (i = (index >> shift) & RADIX_TREE_MAP_MASK ;
+ i < RADIX_TREE_MAP_SIZE; i++) {
+ if (slot->slots[i] != NULL)
+ break;
+ index &= ~((1UL << shift) - 1);
+ index += 1UL << shift;
+ if (index == 0)
+ goto out; /* 32-bit wraparound */
+ }
+ if (i == RADIX_TREE_MAP_SIZE)
+ goto out;
+
+ shift -= RADIX_TREE_MAP_SHIFT;
+ slot = slot->slots[i];
+ }
+
+ /* Bottom level: grab some items */
+ for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) {
+ index++;
+ if (slot->slots[i]) {
+ results[nr_found++] = slot->slots[i];
+ if (nr_found == max_items)
+ goto out;
+ }
+ }
+out:
+ *next_index = index;
+ return nr_found;
+}
+
+/**
+ * radix_tree_gang_lookup - perform multiple lookup on a radix tree
+ * @root: radix tree root
+ * @results: where the results of the lookup are placed
+ * @first_index: start the lookup from this key
+ * @max_items: place up to this many items at *results
+ *
+ * Performs an index-ascending scan of the tree for present items. Places
+ * them at *@results and returns the number of items which were placed at
+ * *@results.
+ *
+ * The implementation is naive.
+ */
+unsigned int
+radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
+ unsigned long first_index, unsigned int max_items)
+{
+ const unsigned long max_index = radix_tree_maxindex(root->height);
+ unsigned long cur_index = first_index;
+ unsigned int ret = 0;
+
+ while (ret < max_items) {
+ unsigned int nr_found;
+ unsigned long next_index; /* Index of next search */
+
+ if (cur_index > max_index)
+ break;
+ nr_found = __lookup(root, results + ret, cur_index,
+ max_items - ret, &next_index);
+ ret += nr_found;
+ if (next_index == 0)
+ break;
+ cur_index = next_index;
+ }
+ return ret;
+}
+
+/**
+ * radix_tree_gang_lookup_ex - perform multiple lookup on a radix tree
+ * @root: radix tree root
+ * @results: where the results of the lookup are placed
+ * @first_index: start the lookup from this key
+ * @last_index: don't lookup past this key
+ * @max_items: place up to this many items at *results
+ *
+ * Performs an index-ascending scan of the tree for present items starting
+ * @first_index until @last_index up to as many as @max_items. Places
+ * them at *@results and returns the number of items which were placed
+ * at *@results.
+ *
+ * The implementation is naive.
+ */
+unsigned int
+radix_tree_gang_lookup_ex(struct radix_tree_root *root, void **results,
+ unsigned long first_index, unsigned long last_index,
+ unsigned int max_items)
+{
+ const unsigned long max_index = radix_tree_maxindex(root->height);
+ unsigned long cur_index = first_index;
+ unsigned int ret = 0;
+
+ while (ret < max_items && cur_index < last_index) {
+ unsigned int nr_found;
+ unsigned long next_index; /* Index of next search */
+
+ if (cur_index > max_index)
+ break;
+ nr_found = __lookup(root, results + ret, cur_index,
+ max_items - ret, &next_index);
+ ret += nr_found;
+ if (next_index == 0)
+ break;
+ cur_index = next_index;
+ }
+ return ret;
+}
+
+#ifdef RADIX_TREE_TAGS
+
+static unsigned int
+__lookup_tag(struct radix_tree_root *root, void **results, unsigned long index,
+ unsigned int max_items, unsigned long *next_index, unsigned int tag)
+{
+ unsigned int nr_found = 0;
+ unsigned int shift;
+ unsigned int height = root->height;
+ struct radix_tree_node *slot;
+
+ shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
+ slot = root->rnode;
+
+ while (height > 0) {
+ unsigned long i = (index >> shift) & RADIX_TREE_MAP_MASK;
+
+ for ( ; i < RADIX_TREE_MAP_SIZE; i++) {
+ if (tag_get(slot, tag, i)) {
+ ASSERT(slot->slots[i] != NULL);
+ break;
+ }
+ index &= ~((1UL << shift) - 1);
+ index += 1UL << shift;
+ if (index == 0)
+ goto out; /* 32-bit wraparound */
+ }
+ if (i == RADIX_TREE_MAP_SIZE)
+ goto out;
+ height--;
+ if (height == 0) { /* Bottom level: grab some items */
+ unsigned long j = index & RADIX_TREE_MAP_MASK;
+
+ for ( ; j < RADIX_TREE_MAP_SIZE; j++) {
+ index++;
+ if (tag_get(slot, tag, j)) {
+ ASSERT(slot->slots[j] != NULL);
+ results[nr_found++] = slot->slots[j];
+ if (nr_found == max_items)
+ goto out;
+ }
+ }
+ }
+ shift -= RADIX_TREE_MAP_SHIFT;
+ slot = slot->slots[i];
+ }
+out:
+ *next_index = index;
+ return nr_found;
+}
+
+/**
+ * radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree
+ * based on a tag
+ * @root: radix tree root
+ * @results: where the results of the lookup are placed
+ * @first_index: start the lookup from this key
+ * @max_items: place up to this many items at *results
+ * @tag: the tag index (< RADIX_TREE_MAX_TAGS)
+ *
+ * Performs an index-ascending scan of the tree for present items which
+ * have the tag indexed by @tag set. Places the items at *@results and
+ * returns the number of items which were placed at *@results.
+ */
+unsigned int
+radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
+ unsigned long first_index, unsigned int max_items,
+ unsigned int tag)
+{
+ const unsigned long max_index = radix_tree_maxindex(root->height);
+ unsigned long cur_index = first_index;
+ unsigned int ret = 0;
+
+ while (ret < max_items) {
+ unsigned int nr_found;
+ unsigned long next_index; /* Index of next search */
+
+ if (cur_index > max_index)
+ break;
+ nr_found = __lookup_tag(root, results + ret, cur_index,
+ max_items - ret, &next_index, tag);
+ ret += nr_found;
+ if (next_index == 0)
+ break;
+ cur_index = next_index;
+ }
+ return ret;
+}
+
+#endif
+
+/**
+ * radix_tree_shrink - shrink height of a radix tree to minimal
+ * @root radix tree root
+ */
+static inline void radix_tree_shrink(struct radix_tree_root *root)
+{
+ /* try to shrink tree height */
+ while (root->height > 1 &&
+ root->rnode->count == 1 &&
+ root->rnode->slots[0]) {
+ struct radix_tree_node *to_free = root->rnode;
+
+ root->rnode = to_free->slots[0];
+ root->height--;
+ /* must only free zeroed nodes into the slab */
+#ifdef RADIX_TREE_TAGS
+ tag_clear(to_free, 0, 0);
+ tag_clear(to_free, 1, 0);
+#endif
+ to_free->slots[0] = NULL;
+ to_free->count = 0;
+ radix_tree_node_free(to_free);
+ }
+}
+
+/**
+ * radix_tree_delete - delete an item from a radix tree
+ * @root: radix tree root
+ * @index: index key
+ *
+ * Remove the item at @index from the radix tree rooted at @root.
+ *
+ * Returns the address of the deleted item, or NULL if it was not present.
+ */
+void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
+{
+ struct radix_tree_path path[RADIX_TREE_MAX_PATH], *pathp = path;
+ struct radix_tree_path *orig_pathp;
+ struct radix_tree_node *slot;
+ unsigned int height, shift;
+ void *ret = NULL;
+#ifdef RADIX_TREE_TAGS
+ char tags[RADIX_TREE_MAX_TAGS];
+ int nr_cleared_tags;
+ int tag;
+#endif
+ int offset;
+
+ height = root->height;
+ if (index > radix_tree_maxindex(height))
+ goto out;
+
+ shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
+ pathp->node = NULL;
+ slot = root->rnode;
+
+ for ( ; height > 0; height--) {
+ if (slot == NULL)
+ goto out;
+
+ pathp++;
+ offset = (index >> shift) & RADIX_TREE_MAP_MASK;
+ pathp->offset = offset;
+ pathp->node = slot;
+ slot = slot->slots[offset];
+ shift -= RADIX_TREE_MAP_SHIFT;
+ }
+
+ ret = slot;
+ if (ret == NULL)
+ goto out;
+
+ orig_pathp = pathp;
+
+#ifdef RADIX_TREE_TAGS
+ /*
+ * Clear all tags associated with the just-deleted item
+ */
+ nr_cleared_tags = 0;
+ for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
+ tags[tag] = 1;
+ if (tag_get(pathp->node, tag, pathp->offset)) {
+ tag_clear(pathp->node, tag, pathp->offset);
+ if (!any_tag_set(pathp->node, tag)) {
+ tags[tag] = 0;
+ nr_cleared_tags++;
+ }
+ }
+ }
+
+ for (pathp--; nr_cleared_tags && pathp->node; pathp--) {
+ for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
+ if (tags[tag])
+ continue;
+
+ tag_clear(pathp->node, tag, pathp->offset);
+ if (any_tag_set(pathp->node, tag)) {
+ tags[tag] = 1;
+ nr_cleared_tags--;
+ }
+ }
+ }
+#endif
+ /* Now free the nodes we do not need anymore */
+ for (pathp = orig_pathp; pathp->node; pathp--) {
+ pathp->node->slots[pathp->offset] = NULL;
+ pathp->node->count--;
+
+ if (pathp->node->count) {
+ if (pathp->node == root->rnode)
+ radix_tree_shrink(root);
+ goto out;
+ }
+
+ /* Node with zero slots in use so free it */
+ radix_tree_node_free(pathp->node);
+ }
+ root->rnode = NULL;
+ root->height = 0;
+out:
+ return ret;
+}
+
+#ifdef RADIX_TREE_TAGS
+/**
+ * radix_tree_tagged - test whether any items in the tree are tagged
+ * @root: radix tree root
+ * @tag: tag to test
+ */
+int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag)
+{
+ struct radix_tree_node *rnode;
+ rnode = root->rnode;
+ if (!rnode)
+ return 0;
+ return any_tag_set(rnode, tag);
+}
+#endif
+
+static unsigned long __maxindex(unsigned int height)
+{
+ unsigned int tmp = height * RADIX_TREE_MAP_SHIFT;
+ unsigned long index = (~0UL >> (RADIX_TREE_INDEX_BITS - tmp - 1)) >> 1;
+
+ if (tmp >= RADIX_TREE_INDEX_BITS)
+ index = ~0UL;
+ return index;
+}
+
+static void radix_tree_init_maxindex(void)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++)
+ height_to_maxindex[i] = __maxindex(i);
+}
+
+void radix_tree_init(void)
+{
+ radix_tree_init_maxindex();
+}
--- /dev/null
+/*
+ * Copyright (C) 2001 Momchil Velikov
+ * Portions Copyright (C) 2001 Christoph Hellwig
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#ifndef __XFS_SUPPORT_RADIX_TREE_H__
+#define __XFS_SUPPORT_RADIX_TREE_H__
+
+#define RADIX_TREE_TAGS
+
+struct radix_tree_root {
+ unsigned int height;
+ struct radix_tree_node *rnode;
+};
+
+#define RADIX_TREE_INIT(mask) { \
+ .height = 0, \
+ .rnode = NULL, \
+}
+
+#define RADIX_TREE(name, mask) \
+ struct radix_tree_root name = RADIX_TREE_INIT(mask)
+
+#define INIT_RADIX_TREE(root, mask) \
+do { \
+ (root)->height = 0; \
+ (root)->rnode = NULL; \
+} while (0)
+
+#ifdef RADIX_TREE_TAGS
+#define RADIX_TREE_MAX_TAGS 2
+#endif
+
+int radix_tree_insert(struct radix_tree_root *, unsigned long, void *);
+void *radix_tree_lookup(struct radix_tree_root *, unsigned long);
+void **radix_tree_lookup_slot(struct radix_tree_root *, unsigned long);
+void *radix_tree_lookup_first(struct radix_tree_root *, unsigned long *);
+void *radix_tree_delete(struct radix_tree_root *, unsigned long);
+unsigned int
+radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
+ unsigned long first_index, unsigned int max_items);
+unsigned int
+radix_tree_gang_lookup_ex(struct radix_tree_root *root, void **results,
+ unsigned long first_index, unsigned long last_index,
+ unsigned int max_items);
+
+void radix_tree_init(void);
+
+#ifdef RADIX_TREE_TAGS
+void *radix_tree_tag_set(struct radix_tree_root *root,
+ unsigned long index, unsigned int tag);
+void *radix_tree_tag_clear(struct radix_tree_root *root,
+ unsigned long index, unsigned int tag);
+int radix_tree_tag_get(struct radix_tree_root *root,
+ unsigned long index, unsigned int tag);
+unsigned int
+radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
+ unsigned long first_index, unsigned int max_items,
+ unsigned int tag);
+int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag);
+#endif
+
+#endif /* __XFS_SUPPORT_RADIX_TREE_H__ */
bits = 0;
for (i = 0; i < sizeof(xfs_rtword_t) * NBBY &&
extno < mp->m_sb.sb_rextents; i++, extno++) {
- if (get_rtbmap(extno) == XR_E_FREE) {
+ if (get_rtbno_state(mp, extno) == XR_E_FREE) {
sb_frextents++;
bits |= freebit;
bit < bitsperblock && extno < mp->m_sb.sb_rextents;
bit++, extno++) {
if (xfs_isset(words, bit)) {
- set_rtbmap(extno, XR_E_FREE);
+ set_rtbno_state(mp, extno, XR_E_FREE);
sb_frextents++;
if (prevbit == 0) {
start_bmbno = bmbno;
xfs_dfiloff_t last_key;
char *forkname;
int numrecs;
- xfs_agnumber_t agno;
- xfs_agblock_t agbno;
- int state;
if (whichfork == XFS_DATA_FORK)
forkname = _("data");
bm_cursor->level[level].right_fsbno =
be64_to_cpu(block->bb_u.l.bb_rightsib);
- agno = XFS_FSB_TO_AGNO(mp, bno);
- agbno = XFS_FSB_TO_AGBNO(mp, bno);
-
- pthread_mutex_lock(&ag_locks[agno]);
- state = get_bmap(agno, agbno);
- switch (state) {
+ switch (get_fsbno_state(mp, bno)) {
case XR_E_UNKNOWN:
case XR_E_FREE1:
case XR_E_FREE:
- set_bmap(agno, agbno, XR_E_INUSE);
+ set_fsbno_state(mp, bno, XR_E_INUSE);
break;
case XR_E_FS_MAP:
case XR_E_INUSE:
* we made it here, the block probably
* contains btree data.
*/
- set_bmap(agno, agbno, XR_E_MULT);
+ set_fsbno_state(mp, bno, XR_E_MULT);
do_warn(
_("inode 0x%llx bmap block 0x%llx claimed, state is %d\n"),
- ino, (__uint64_t) bno, state);
+ ino, (__uint64_t) bno,
+ get_fsbno_state(mp, bno));
break;
case XR_E_MULT:
case XR_E_INUSE_FS:
- set_bmap(agno, agbno, XR_E_MULT);
+ set_fsbno_state(mp, bno, XR_E_MULT);
do_warn(
_("inode 0x%llx bmap block 0x%llx claimed, state is %d\n"),
- ino, (__uint64_t) bno, state);
+ ino, (__uint64_t) bno,
+ get_fsbno_state(mp, bno));
/*
* if we made it to here, this is probably a bmap block
* that is being used by *another* file as a bmap block
default:
do_warn(
_("bad state %d, inode 0x%llx bmap block 0x%llx\n"),
- state, ino, (__uint64_t) bno);
+ get_fsbno_state(mp, bno),
+ ino, (__uint64_t) bno);
break;
}
- pthread_mutex_unlock(&ag_locks[agno]);
} else {
/*
* attribute fork for realtime files is in the regular
* filesystem
*/
if (type != XR_INO_RTDATA || whichfork != XFS_DATA_FORK) {
- if (search_dup_extent(XFS_FSB_TO_AGNO(mp, bno),
- XFS_FSB_TO_AGBNO(mp, bno),
- XFS_FSB_TO_AGBNO(mp, bno) + 1))
+ if (search_dup_extent(mp, XFS_FSB_TO_AGNO(mp, bno),
+ XFS_FSB_TO_AGBNO(mp, bno)))
return(1);
} else {
if (search_rt_dup_extent(mp, bno))
/*
* check for btree blocks multiply claimed
*/
- state = get_bmap(agno, bno);
- switch (state != XR_E_UNKNOWN) {
- set_bmap(agno, bno, XR_E_MULT);
+ state = get_agbno_state(mp, agno, bno);
+
+ switch (state) {
+ case XR_E_UNKNOWN:
+ set_agbno_state(mp, agno, bno, XR_E_FS_MAP);
+ break;
+ default:
+ set_agbno_state(mp, agno, bno, XR_E_MULT);
do_warn(
_("%s freespace btree block claimed (state %d), agno %d, bno %d, suspect %d\n"),
name, state, agno, bno, suspect);
return;
}
- set_bmap(agno, bno, XR_E_FS_MAP);
numrecs = be16_to_cpu(block->bb_numrecs);
rp = XFS_ALLOC_REC_ADDR(mp, block, 1);
for (i = 0; i < numrecs; i++) {
xfs_agblock_t b, end;
- xfs_extlen_t len, blen;
+ xfs_extlen_t len;
b = be32_to_cpu(rp[i].ar_startblock);
len = be32_to_cpu(rp[i].ar_blockcount);
if (!verify_agbno(mp, agno, end - 1))
continue;
- for ( ; b < end; b += blen) {
- state = get_bmap_ext(agno, b, end, &blen);
+ for ( ; b < end; b++) {
+ state = get_agbno_state(mp, agno, b);
switch (state) {
case XR_E_UNKNOWN:
- set_bmap(agno, b, XR_E_FREE1);
+ set_agbno_state(mp, agno, b,
+ XR_E_FREE1);
break;
case XR_E_FREE1:
/*
* FREE1 blocks later
*/
if (magic == XFS_ABTC_MAGIC) {
- set_bmap_ext(agno, b, blen,
- XR_E_FREE);
+ set_agbno_state(mp, agno, b,
+ XR_E_FREE);
break;
}
default:
do_warn(
- _("block (%d,%d-%d) multiply claimed by %s space tree, state - %d\n"),
- agno, b, b + blen - 1,
- name, state);
+ _("block (%d,%d) multiply claimed by %s space tree, state - %d\n"),
+ agno, b, name, state);
break;
}
}
j < XFS_INODES_PER_CHUNK;
j += mp->m_sb.sb_inopblock) {
agbno = XFS_AGINO_TO_AGBNO(mp, ino + j);
-
- state = get_bmap(agno, agbno);
+ state = get_agbno_state(mp, agno, agbno);
if (state == XR_E_UNKNOWN) {
- set_bmap(agno, agbno, XR_E_INO);
+ set_agbno_state(mp, agno, agbno, XR_E_INO);
} else if (state == XR_E_INUSE_FS && agno == 0 &&
ino + j >= first_prealloc_ino &&
ino + j < last_prealloc_ino) {
- set_bmap(agno, agbno, XR_E_INO);
+ set_agbno_state(mp, agno, agbno, XR_E_INO);
} else {
do_warn(
_("inode chunk claims used block, inobt block - agno %d, bno %d, inopb %d\n"),
* check for btree blocks multiply claimed, any unknown/free state
* is ok in the bitmap block.
*/
- state = get_bmap(agno, bno);
+ state = get_agbno_state(mp, agno, bno);
+
switch (state) {
case XR_E_UNKNOWN:
case XR_E_FREE1:
case XR_E_FREE:
- set_bmap(agno, bno, XR_E_FS_MAP);
+ set_agbno_state(mp, agno, bno, XR_E_FS_MAP);
break;
default:
- set_bmap(agno, bno, XR_E_MULT);
+ set_agbno_state(mp, agno, bno, XR_E_MULT);
do_warn(
_("inode btree block claimed (state %d), agno %d, bno %d, suspect %d\n"),
state, agno, bno, suspect);
if (XFS_SB_BLOCK(mp) != XFS_AGFL_BLOCK(mp) &&
XFS_AGF_BLOCK(mp) != XFS_AGFL_BLOCK(mp) &&
XFS_AGI_BLOCK(mp) != XFS_AGFL_BLOCK(mp))
- set_bmap(agno, XFS_AGFL_BLOCK(mp), XR_E_FS_MAP);
+ set_agbno_state(mp, agno, XFS_AGFL_BLOCK(mp), XR_E_FS_MAP);
if (be32_to_cpu(agf->agf_flcount) == 0)
return;
for (;;) {
bno = be32_to_cpu(agfl->agfl_bno[i]);
if (verify_agbno(mp, agno, bno))
- set_bmap(agno, bno, XR_E_FREE);
+ set_agbno_state(mp, agno, bno, XR_E_FREE);
else
do_warn(_("bad agbno %u in agfl, agno %d\n"),
bno, agno);
extern void phase5(xfs_mount_t *);
extern void phase6(xfs_mount_t *);
extern void phase7(xfs_mount_t *);
+extern void incore_init(xfs_mount_t *);
#define XR_MAX_SECT_SIZE (64 * 1024)
bindtextdomain(PACKAGE, LOCALEDIR);
textdomain(PACKAGE);
+#ifdef XR_PF_TRACE
+ pf_trace_file = fopen("/tmp/xfs_repair_prefetch.trace", "w");
+ setvbuf(pf_trace_file, NULL, _IOLBF, 1024);
+#endif
+
temp_mp = &xfs_m;
setbuf(stdout, NULL);
calc_mkfs(mp);
/*
- * initialize block alloc map
+ * check sb filesystem stats and initialize in-core data structures
*/
- init_bmaps(mp);
- incore_ino_init(mp);
- incore_ext_init(mp);
-
- /* initialize random globals now that we know the fs geometry */
- inodes_per_block = mp->m_sb.sb_inopblock;
+ incore_init(mp);
if (parse_sb_version(&mp->m_sb)) {
do_warn(
}
timestamp(PHASE_END, 5, NULL);
- /*
- * Done with the block usage maps, toss them...
- */
- free_bmaps(mp);
-
if (!bad_ino_btree) {
phase6(mp);
timestamp(PHASE_END, 6, NULL);
if (verbose)
summary_report();
do_log(_("done\n"));
- pftrace_done();
-
+#ifdef XR_PF_TRACE
+ fclose(pf_trace_file);
+#endif
return (0);
}