Merge of master-melb:xfs-cmds:29147a by kenmcd.
Perform parallel sequential bulk read prefetching in xfs_repair
#ifndef __CACHE_H__
#define __CACHE_H__
+#define HASH_CACHE_RATIO 8
+
/*
* Simple, generic implementation of a cache (arbitrary data).
* Provides a hash table with a capped number of cache entries.
struct cache_node;
typedef void *cache_key_t;
+
typedef void (*cache_walk_t)(struct cache_node *);
-typedef struct cache_node * (*cache_node_alloc_t)(void);
+typedef struct cache_node * (*cache_node_alloc_t)(cache_key_t);
typedef void (*cache_node_flush_t)(struct cache_node *);
typedef void (*cache_node_relse_t)(struct cache_node *);
typedef unsigned int (*cache_node_hash_t)(cache_key_t, unsigned int);
void cache_node_put(struct cache_node *);
int cache_node_purge(struct cache *, cache_key_t, struct cache_node *);
void cache_report(FILE *fp, const char *, struct cache *);
+int cache_overflowed(struct cache *);
#endif /* __CACHE_H__ */
#define LIBXFS_MOUNT_32BITINOOPT 0x0008
#define LIBXFS_MOUNT_COMPAT_ATTR 0x0010
-#define LIBXFS_IHASHSIZE(sbp) (1<<16) /* tweak based on icount? */
-#define LIBXFS_BHASHSIZE(sbp) (1<<16) /* ditto, on blocks used? */
+#define LIBXFS_IHASHSIZE(sbp) (1<<10)
+#define LIBXFS_BHASHSIZE(sbp) (1<<10)
extern xfs_mount_t *libxfs_mount (xfs_mount_t *, xfs_sb_t *,
dev_t, dev_t, dev_t, int);
xfs_daddr_t b_blkno;
unsigned b_bcount;
dev_t b_dev;
+ pthread_mutex_t b_lock;
void *b_fsprivate;
void *b_fsprivate2;
void *b_fsprivate3;
char *b_addr;
+#ifdef XFS_BUF_TRACING
+ struct list_head b_lock_list;
+ const char *b_func;
+ const char *b_file;
+ int b_line;
+#endif
} xfs_buf_t;
enum xfs_buf_flags_t { /* b_flags bits */
#define XFS_BUF_FSPRIVATE3(bp,type) ((type)(bp)->b_fsprivate3)
#define XFS_BUF_SET_FSPRIVATE3(bp,val) (bp)->b_fsprivate3 = (void *)(val)
-extern xfs_buf_t *libxfs_getsb (xfs_mount_t *, int);
-extern xfs_buf_t *libxfs_readbuf (dev_t, xfs_daddr_t, int, int);
-extern int libxfs_readbufr (dev_t, xfs_daddr_t, xfs_buf_t *, int, int);
-extern int libxfs_writebuf (xfs_buf_t *, int);
-extern int libxfs_writebufr (xfs_buf_t *);
-extern int libxfs_writebuf_int (xfs_buf_t *, int);
-
/* Buffer Cache Interfaces */
+
extern struct cache *libxfs_bcache;
extern struct cache_operations libxfs_bcache_operations;
-extern void libxfs_bcache_purge (void);
-extern void libxfs_bcache_flush (void);
-extern xfs_buf_t *libxfs_getbuf (dev_t, xfs_daddr_t, int);
+
+#ifdef XFS_BUF_TRACING
+
+#define libxfs_readbuf(dev, daddr, len, flags) \
+ libxfs_trace_readbuf(__FUNCTION__, __FILE__, __LINE__, (dev), (daddr), (len), (flags))
+#define libxfs_writebuf(buf, flags) \
+ libxfs_trace_writebuf(__FUNCTION__, __FILE__, __LINE__, (buf), (flags))
+#define libxfs_getbuf(dev, daddr, len) \
+ libxfs_trace_getbuf(__FUNCTION__, __FILE__, __LINE__, (dev), (daddr), (len))
+#define libxfs_putbuf(buf) \
+ libxfs_trace_putbuf(__FUNCTION__, __FILE__, __LINE__, (buf))
+
+extern xfs_buf_t *libxfs_trace_readbuf(const char *, const char *, int, dev_t, xfs_daddr_t, int, int);
+extern int libxfs_trace_writebuf(const char *, const char *, int, xfs_buf_t *, int);
+extern xfs_buf_t *libxfs_trace_getbuf(const char *, const char *, int, dev_t, xfs_daddr_t, int);
+extern void libxfs_trace_putbuf (const char *, const char *, int, xfs_buf_t *);
+
+#else
+
+extern xfs_buf_t *libxfs_readbuf(dev_t, xfs_daddr_t, int, int);
+extern int libxfs_writebuf(xfs_buf_t *, int);
+extern xfs_buf_t *libxfs_getbuf(dev_t, xfs_daddr_t, int);
extern void libxfs_putbuf (xfs_buf_t *);
-extern void libxfs_purgebuf (xfs_buf_t *);
+
+#endif
+
+extern xfs_buf_t *libxfs_getsb(xfs_mount_t *, int);
+extern void libxfs_bcache_purge(void);
+extern void libxfs_bcache_flush(void);
+extern void libxfs_purgebuf(xfs_buf_t *);
+extern int libxfs_bcache_overflowed(void);
+extern int libxfs_bcache_usage(void);
/* Buffer (Raw) Interfaces */
-extern xfs_buf_t *libxfs_getbufr (dev_t, xfs_daddr_t, int);
-extern void libxfs_putbufr (xfs_buf_t *);
+extern xfs_buf_t *libxfs_getbufr(dev_t, xfs_daddr_t, int);
+extern void libxfs_putbufr(xfs_buf_t *);
+
+extern int libxfs_writebuf_int(xfs_buf_t *, int);
+extern int libxfs_readbufr(dev_t, xfs_daddr_t, xfs_buf_t *, int, int);
extern int libxfs_bhash_size;
extern int libxfs_ihash_size;
extern void cmn_err(int, char *, ...);
enum ce { CE_DEBUG, CE_CONT, CE_NOTE, CE_WARN, CE_ALERT, CE_PANIC };
-/* lio interface */
-/* lio_listio(3) interface (POSIX linked asynchronous I/O) */
-extern int libxfs_lio_ino_count;
-extern int libxfs_lio_dir_count;
-extern int libxfs_lio_aio_count;
-
-extern int libxfs_lio_init(void);
-extern void libxfs_lio_allocate(void);
-extern void *libxfs_get_lio_buffer(int type);
-extern void libxfs_put_lio_buffer(void *buffer);
-extern int libxfs_readbuf_list(dev_t dev, int nent, void *voidp, int type);
-
-typedef struct libxfs_lio_req {
- xfs_daddr_t blkno;
- int len; /* bbs */
-} libxfs_lio_req_t;
-
-#define LIBXFS_LIO_TYPE_INO 0x1
-#define LIBXFS_LIO_TYPE_DIR 0x2
-#define LIBXFS_LIO_TYPE_RAW 0x3
#define LIBXFS_BBTOOFF64(bbs) (((xfs_off_t)(bbs)) << BBSHIFT)
-extern int libxfs_nproc(void);
+extern int libxfs_nproc(void);
+extern unsigned long libxfs_physmem(void); /* in kilobytes */
#include <xfs/xfs_ialloc.h>
#include <xfs/xfs_rtalloc.h>
#define _BOOLEAN_T_DEFINED 1
#endif
-#ifdef __USE_GNU
-typedef struct aiocb64 aiocb64_t;
-#define _AIOCB64_T_DEFINED 1
-#endif
-
#endif /* __XFS_LINUX_H__ */
LT_AGE = 0
HFILES = xfs.h init.h
-CFILES = bit.c cache.c init.c lio.c logitem.c rdwr.c trans.c util.c \
+CFILES = bit.c cache.c init.c logitem.c rdwr.c trans.c util.c \
xfs_alloc.c xfs_ialloc.c xfs_rtalloc.c \
xfs_inode.c xfs_btree.c xfs_alloc_btree.c xfs_ialloc_btree.c \
xfs_bmap_btree.c xfs_da_btree.c xfs_dir.c xfs_dir_leaf.c \
#define CACHE_DEBUG 1
#undef CACHE_ABORT
/* #define CACHE_ABORT 1 */
-#define HASH_CACHE_RATIO 8
static unsigned int cache_generic_bulkrelse(struct cache *, struct list_head *);
struct cache_node *
cache_node_allocate(
struct cache * cache,
- struct cache_hash * hashlist)
+ struct cache_hash * hashlist,
+ cache_key_t key)
{
unsigned int nodesfree;
struct cache_node * node;
pthread_mutex_unlock(&cache->c_mutex);
if (!nodesfree)
return NULL;
- if (!(node = cache->alloc())) { /* uh-oh */
+ if (!(node = cache->alloc(key))) { /* uh-oh */
pthread_mutex_lock(&cache->c_mutex);
cache->c_count--;
pthread_mutex_unlock(&cache->c_mutex);
return node;
}
+int
+cache_overflowed(
+ struct cache * cache)
+{
+ return (cache->c_maxcount == cache->c_max);
+}
+
/*
* Lookup in the cache hash table. With any luck we'll get a cache
* hit, in which case this will all be over quickly and painlessly.
break;
}
if (pos == head) {
- node = cache_node_allocate(cache, hash);
+ node = cache_node_allocate(cache, hash, key);
if (!node) {
priority = cache_shake(cache, hash, priority);
goto restart;
}
/*
- * Flush all nodes in the cache to disk.
+ * Flush all nodes in the cache to disk.
*/
void
cache_flush(
struct list_head * pos;
struct cache_node * node;
int i;
-
+
if (!cache->flush)
return;
-
+
for (i = 0; i < cache->c_hashsize; i++) {
hash = &cache->c_hash[i];
-
+
pthread_mutex_lock(&hash->ch_mutex);
head = &hash->ch_list;
for (pos = head->next; pos != head; pos = pos->next) {
total += i*hash_bucket_lengths[i];
if (hash_bucket_lengths[i] == 0)
continue;
- fprintf(fp, "Hash buckets with %2d entries %5ld (%3ld%%)\n",
+ fprintf(fp, "Hash buckets with %2d entries %5ld (%3ld%%)\n",
i, hash_bucket_lengths[i], (i*hash_bucket_lengths[i]*100)/cache->c_count);
}
if (hash_bucket_lengths[i]) /* last report bucket is the overflow bucket */
- fprintf(fp, "Hash buckets with >%2d entries %5ld (%3ld%%)\n",
+ fprintf(fp, "Hash buckets with >%2d entries %5ld (%3ld%%)\n",
i-1, hash_bucket_lengths[i], ((cache->c_count-total)*100)/cache->c_count);
}
#include <sys/mount.h>
#include <sys/ioctl.h>
#include <xfs/libxfs.h>
+#include <sys/sysctl.h>
int platform_has_uuid = 1;
extern char *progname;
*bsz = BBSIZE;
}
-/* ARGSUSED */
-int
-platform_aio_init(int aio_count)
-{
- return 0; /* aio/lio_listio not available */
-}
-
char *
platform_findrawpath(char *path)
{
int
platform_nproc(void)
{
- return 1;
+ int ncpu;
+ size_t len = sizeof(ncpu);
+ static int mib[2] = {CTL_HW, HW_NCPU};
+
+ if (sysctl(mib, 2, &ncpu, &len, NULL, 0) < 0)
+ ncpu = 1;
+
+ return ncpu;
}
+
+unsigned long
+platform_physmem(void)
+{
+ unsigned long physmem;
+ size_t len = sizeof(physmem);
+ static int mib[2] = {CTL_HW, HW_PHYSMEM};
+
+ if (sysctl(mib, 2, &physmem, &len, NULL, 0) < 0) {
+ fprintf(stderr, _("%s: can't determine memory size\n"),
+ progname);
+ exit(1);
+ }
+ return physmem >> 10;
+}
+
*bsz = (int)ssize;
}
-/* ARGSUSED */
-int
-platform_aio_init(int aio_count)
-{
- return 0; /* aio/lio_listio not available */
-}
-
char *
platform_findrawpath(char *path)
{
int
platform_nproc(void)
{
- return 1;
+ int ncpu;
+ size_t len = sizeof(ncpu);
+ static int mib[2] = {CTL_HW, HW_NCPU};
+
+ if (sysctl(mib, 2, &ncpu, &len, NULL, 0) < 0)
+ ncpu = 1;
+
+ return ncpu;
+}
+
+unsigned long
+platform_physmem(void)
+{
+ unsigned long physmem;
+ size_t len = sizeof(physmem);
+ static int mib[2] = {CTL_HW, HW_PHYSMEM};
+
+ if (sysctl(mib, 2, &physmem, &len, NULL, 0) < 0) {
+ fprintf(stderr, _("%s: can't determine memory size\n"),
+ progname);
+ exit(1);
+ }
+ return physmem >> 10;
}
{
return platform_nproc();
}
+
+unsigned long
+libxfs_physmem(void)
+{
+ return platform_physmem();
+}
\ No newline at end of file
extern char *platform_findblockpath (char *path);
extern int platform_direct_blockdev (void);
extern int platform_align_blockdev (void);
-extern int platform_aio_init (int aio_count);
extern int platform_nproc(void);
+extern unsigned long platform_physmem(void); /* in kilobytes */
extern int platform_has_uuid;
#endif /* LIBXFS_INIT_H */
*/
#include <xfs/libxfs.h>
-#include <aio.h>
#include <diskinfo.h>
#include <sys/sysmp.h>
*bsz = BBSIZE;
}
-int
-platform_aio_init(int aio_count)
-{
- struct aioinit aio_init;
-
- memset(&aio_init, 0, sizeof(aio_init));
- aio_init.aio_threads = aio_count;
- aio_init.aio_numusers = aio_count;
-
- aio_sgi_init64(&aio_init);
- return (1); /* aio/lio_listio available */
-}
-
char *
platform_findrawpath(char *path)
{
return sysmp(MP_NPROCS);
}
+unsigned long
+platform_physmem(void)
+{
+ struct rminfo ri;
+
+ if (sysmp(MP_SAGET, MPSA_RMINFO, &ri, sizeof(ri)) < 0)
+ fprintf(stderr, _("%s: can't determine memory size\n"),
+ progname);
+ exit(1);
+ }
+ return (ri.physmem >> 10) * getpagesize(); /* kilobytes */
+}
\ No newline at end of file
#include <xfs/libxfs.h>
#include <mntent.h>
#include <sys/stat.h>
-#include <aio.h>
#undef ustat
#include <sys/ustat.h>
#include <sys/mount.h>
#include <sys/ioctl.h>
+#include <sys/sysinfo.h>
int platform_has_uuid = 1;
extern char *progname;
max_block_alignment = *bsz;
}
-int
-platform_aio_init(int aio_count)
-{
- struct aioinit lcl_aio_init;
-
- memset(&lcl_aio_init, 0, sizeof(lcl_aio_init));
- lcl_aio_init.aio_threads = aio_count;
- lcl_aio_init.aio_numusers = aio_count;
-
- aio_init(&lcl_aio_init);
- return (1); /* aio/lio_listio available */
-}
-
char *
platform_findrawpath(char *path)
{
{
return sysconf(_SC_NPROCESSORS_ONLN);
}
+
+unsigned long
+platform_physmem(void)
+{
+ struct sysinfo si;
+
+ if (sysinfo(&si) < 0) {
+ fprintf(stderr, _("%s: can't determine memory size\n"),
+ progname);
+ exit(1);
+ }
+ return (si.totalram >> 10) * si.mem_unit; /* kilobytes */
+}
+++ /dev/null
-#include <xfs/libxfs.h>
-#include "init.h"
-#include "aio.h"
-
-#define DEF_PREFETCH_INOS 16
-#define DEF_PREFETCH_DIRS 16
-#define DEF_PREFETCH_AIO 32
-int libxfs_lio_ino_count = DEF_PREFETCH_INOS;
-int libxfs_lio_dir_count = DEF_PREFETCH_DIRS;
-int libxfs_lio_aio_count = DEF_PREFETCH_AIO;
-
-static pthread_key_t lio_ino_key;
-static pthread_key_t lio_dir_key;
-
-void
-libxfs_lio_allocate(void)
-{
-#ifdef _AIOCB64_T_DEFINED
- size_t size;
- void *voidp;
-
- /*
- * allocate a per-thread buffer which will be used in libxfs_readbuf_list
- * in the following order:
- * libxfs_lio_req_t array
- * aiocb64_t array
- * aiocb64_t * array
- * xfs_buf_t * array
- */
- size = sizeof(libxfs_lio_req_t) + sizeof(aiocb64_t) + sizeof(aiocb64_t *) + sizeof(xfs_buf_t *);
-
- voidp = malloc(libxfs_lio_ino_count*size);
- if (voidp == NULL) {
- fprintf(stderr, "lio_allocate: cannot allocate thread specific storage\n");
- exit(1);
- /* NO RETURN */
- return;
- }
- pthread_setspecific(lio_ino_key, voidp);
-
- voidp = malloc(libxfs_lio_dir_count*size);
- if (voidp == NULL) {
- fprintf(stderr, "lio_allocate: cannot allocate thread specific storage\n");
- exit(1);
- /* NO RETURN */
- return;
- }
- pthread_setspecific(lio_dir_key, voidp);
-#endif /* _AIOCB64_T_DEFINED */
-}
-
-int
-libxfs_lio_init(void)
-{
-#ifdef _AIOCB64_T_DEFINED
- if (platform_aio_init(libxfs_lio_aio_count)) {
- pthread_key_create(&lio_ino_key, NULL);
- pthread_key_create(&lio_dir_key, NULL);
- return (1);
- }
-#endif /* _AIOCB64_T_DEFINED */
- return (0);
-}
-
-void *
-libxfs_get_lio_buffer(int type)
-{
-#ifdef _AIOCB64_T_DEFINED
- if (type == LIBXFS_LIO_TYPE_INO)
- return pthread_getspecific(lio_ino_key);
- if (type == LIBXFS_LIO_TYPE_DIR)
- return pthread_getspecific(lio_dir_key);
- if (type == LIBXFS_LIO_TYPE_RAW) {
- /* use the inode buffers since there is
- * no overlap with the other requests.
- */
- return pthread_getspecific(lio_ino_key);
- }
- fprintf(stderr, "get_lio_buffer: invalid type 0x%x\n", type);
- exit(1);
-#endif
- return NULL;
-}
-
-/* ARGSUSED */
-void
-libxfs_put_lio_buffer(void *buffer)
-{
- return; /* nothing to do */
-}
-
-static int
-lio_compare(const void *e1, const void *e2)
-{
- libxfs_lio_req_t *r1 = (libxfs_lio_req_t *) e1;
- libxfs_lio_req_t *r2 = (libxfs_lio_req_t *) e2;
-
- return (int) (r1->blkno - r2->blkno);
-}
-
-int
-libxfs_readbuf_list(dev_t dev, int nent, void *voidp, int type)
-{
-#ifdef _AIOCB64_T_DEFINED
- libxfs_lio_req_t *rblp;
- xfs_buf_t *bp, **bplist;
- aiocb64_t *aioclist, **aiocptr;
- int i, nbp, err;
- int fd;
-
- if (nent <= 0)
- return 0;
- if ((type == LIBXFS_LIO_TYPE_INO) || (type == LIBXFS_LIO_TYPE_RAW)) {
- if (libxfs_lio_ino_count == 0)
- return (0);
- if (nent > libxfs_lio_ino_count)
- nent = libxfs_lio_ino_count;
- }
- else if (type == LIBXFS_LIO_TYPE_DIR) {
- if (libxfs_lio_dir_count == 0)
- return (0);
- if (nent > libxfs_lio_dir_count)
- nent = libxfs_lio_dir_count;
- if (nent > 2)
- qsort(voidp, nent, sizeof(libxfs_lio_req_t), lio_compare);
- }
- else {
- fprintf(stderr, "Invalid type 0x%x in libxfs_readbuf_list\n", type);
- abort();
- /* NO RETURN */
- return (0);
- }
-
- /* space for lio_listio processing, see libxfs_lio_allocate */
- rblp = (libxfs_lio_req_t *) voidp;
- aioclist = (aiocb64_t *) (rblp + nent);
- aiocptr = (aiocb64_t **) (aioclist + nent);
- bplist = (xfs_buf_t **) (aiocptr + nent);
-
- bzero(aioclist, nent*sizeof(aiocb64_t));
-
- /* look in buffer cache */
- for (i = 0, nbp = 0; i < nent; i++) {
- ASSERT(rblp[i].len);
- bp = libxfs_getbuf(dev, rblp[i].blkno, rblp[i].len);
- if (bp == NULL)
- continue;
- if (bp->b_flags & (LIBXFS_B_UPTODATE|LIBXFS_B_DIRTY)) {
- /* already in cache */
- libxfs_putbuf(bp);
- continue;
- }
- bplist[nbp++] = bp;
- }
-
- if (nbp == 0)
- return (0); /* Nothing to do */
-
- if (nbp == 1) {
- libxfs_putbuf(bplist[0]); /* single buffer, no point */
- return (0);
- }
-
- fd = libxfs_device_to_fd(dev);
-
- for (i = 0; i < nbp; i++) {
- aioclist[i].aio_fildes = fd;
- aioclist[i].aio_nbytes = XFS_BUF_COUNT(bplist[i]);
- aioclist[i].aio_buf = XFS_BUF_PTR(bplist[i]);
- aioclist[i].aio_offset = LIBXFS_BBTOOFF64(XFS_BUF_ADDR(bplist[i]));
- aioclist[i].aio_lio_opcode = LIO_READ;
- aiocptr[i] = &aioclist[i];
- }
-
- err = lio_listio64(LIO_WAIT, aiocptr, nbp, NULL);
-
- if (err != 0) {
- fprintf(stderr, "lio_listio (%d entries) failure err = %d\n", nbp, err);
- }
-
- for (i = 0; i < nbp; i++) {
- /* buffer with data in cache available via future libxfs_readbuf */
- if (err == 0)
- bplist[i]->b_flags |= LIBXFS_B_UPTODATE;
- libxfs_putbuf(bplist[i]);
- }
-
- return (err == 0? nbp : -1);
-#else /* _AIOCB64_T_DEFINED */
- return -1;
-#endif /* _AIOCB64_T_DEFINED */
-}
#define BDSTRAT_SIZE (256 * 1024)
#define min(x, y) ((x) < (y) ? (x) : (y))
+#define IO_BCOMPARE_CHECK
+
void
libxfs_device_zero(dev_t dev, xfs_daddr_t start, uint len)
{
return BBTOB(len);
}
+/*
+ * Simple I/O (buffer cache) interface
+ */
+
+
+#ifdef XFS_BUF_TRACING
+
+#undef libxfs_readbuf
+#undef libxfs_writebuf
+#undef libxfs_getbuf
+#undef libxfs_putbuf
+
+xfs_buf_t *libxfs_readbuf(dev_t, xfs_daddr_t, int, int);
+int libxfs_writebuf(xfs_buf_t *, int);
+xfs_buf_t *libxfs_getbuf(dev_t, xfs_daddr_t, int);
+void libxfs_putbuf (xfs_buf_t *);
+
+xfs_buf_t *
+libxfs_trace_readbuf(const char *func, const char *file, int line, dev_t dev, xfs_daddr_t blkno, int len, int flags)
+{
+ xfs_buf_t *bp = libxfs_readbuf(dev, blkno, len, flags);
+
+ bp->b_func = func;
+ bp->b_file = file;
+ bp->b_line = line;
+
+ return bp;
+}
+
+int
+libxfs_trace_writebuf(const char *func, const char *file, int line, xfs_buf_t *bp, int flags)
+{
+ bp->b_func = func;
+ bp->b_file = file;
+ bp->b_line = line;
+
+ return libxfs_writebuf(bp, flags);
+}
+
+xfs_buf_t *
+libxfs_trace_getbuf(const char *func, const char *file, int line, dev_t device, xfs_daddr_t blkno, int len)
+{
+ xfs_buf_t *bp = libxfs_getbuf(device, blkno, len);
+
+ bp->b_func = func;
+ bp->b_file = file;
+ bp->b_line = line;
+
+ return bp;
+}
+
+void
+libxfs_trace_putbuf(const char *func, const char *file, int line, xfs_buf_t *bp)
+{
+ bp->b_func = func;
+ bp->b_file = file;
+ bp->b_line = line;
+
+ libxfs_putbuf(bp);
+}
+
+
+#endif
+
+
xfs_buf_t *
libxfs_getsb(xfs_mount_t *mp, int flags)
{
XFS_FSS_TO_BB(mp, 1), flags);
}
-
-/*
- * Simple I/O (buffer cache) interface
- */
-
xfs_zone_t *xfs_buf_zone;
typedef struct {
dev_t device;
xfs_daddr_t blkno;
- unsigned int count;
+ unsigned int bblen;
} xfs_bufkey_t;
static unsigned int
libxfs_bhash(cache_key_t key, unsigned int hashsize)
{
- return ((unsigned int)((xfs_bufkey_t *)key)->blkno) % hashsize;
+ return (((unsigned int)((xfs_bufkey_t *)key)->blkno) >> 5) % hashsize;
}
static int
#ifdef IO_BCOMPARE_CHECK
if (bp->b_dev == bkey->device &&
bp->b_blkno == bkey->blkno &&
- bp->b_bcount != bkey->count)
- fprintf(stderr, "Badness in key lookup (length)\n"
- "bp=(bno %llu, len %u bb) key=(bno %llu, len %u bbs)\n",
+ bp->b_bcount != BBTOB(bkey->bblen))
+ fprintf(stderr, "%lx: Badness in key lookup (length)\n"
+ "bp=(bno %llu, len %u bytes) key=(bno %llu, len %u bytes)\n",
+ pthread_self(),
(unsigned long long)bp->b_blkno, (int)bp->b_bcount,
- (unsigned long long)bkey->blkno, (int)bkey->count);
+ (unsigned long long)bkey->blkno, BBTOB(bkey->bblen));
#endif
return (bp->b_dev == bkey->device &&
bp->b_blkno == bkey->blkno &&
- bp->b_bcount == bkey->count);
+ bp->b_bcount == BBTOB(bkey->bblen));
}
void
bp->b_flags, bp->b_node.cn_count);
}
-static void
-libxfs_brelse(struct cache_node *node)
-{
- xfs_buf_t *bp = (xfs_buf_t *)node;
- xfs_buf_log_item_t *bip;
- extern xfs_zone_t *xfs_buf_item_zone;
-
- if (bp != NULL) {
- if (bp->b_flags & LIBXFS_B_DIRTY)
- libxfs_writebufr(bp);
- bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
- if (bip)
- libxfs_zone_free(xfs_buf_item_zone, bip);
- free(bp->b_addr);
- bp->b_addr = NULL;
- bp->b_flags = 0;
- free(bp);
- bp = NULL;
- }
-}
-
static void
libxfs_initbuf(xfs_buf_t *bp, dev_t device, xfs_daddr_t bno, unsigned int bytes)
{
strerror(errno));
exit(1);
}
+#ifdef XFS_BUF_TRACING
+ list_head_init(&bp->b_lock_list);
+#endif
+ pthread_mutex_init(&bp->b_lock, NULL);
}
xfs_buf_t *
xfs_buf_t *bp;
bp = libxfs_zone_zalloc(xfs_buf_zone);
- libxfs_initbuf(bp, device, blkno, BBTOB(len));
+ if (bp != NULL)
+ libxfs_initbuf(bp, device, blkno, BBTOB(len));
+#ifdef IO_DEBUG
+ printf("%lx: %s: allocated %u bytes buffer, key=%llu(%llu), %p\n",
+ pthread_self(), __FUNCTION__, BBTOB(len),
+ (long long)LIBXFS_BBTOOFF64(blkno), (long long)blkno, bp);
+#endif
return bp;
}
-void
-libxfs_putbufr(xfs_buf_t *bp)
-{
- libxfs_brelse((struct cache_node *)bp);
-}
+
+#ifdef XFS_BUF_TRACING
+struct list_head lock_buf_list = {&lock_buf_list, &lock_buf_list};
+int lock_buf_count = 0;
+#endif
xfs_buf_t *
libxfs_getbuf(dev_t device, xfs_daddr_t blkno, int len)
{
xfs_buf_t *bp;
xfs_bufkey_t key;
- unsigned int bytes = BBTOB(len);
+ int miss;
key.device = device;
key.blkno = blkno;
- key.count = bytes;
-
- if (cache_node_get(libxfs_bcache, &key, (struct cache_node **)&bp)) {
+ key.bblen = len;
+
+ miss = cache_node_get(libxfs_bcache, &key, (struct cache_node **)&bp);
+ if (bp) {
+ pthread_mutex_lock(&bp->b_lock);
+#ifdef XFS_BUF_TRACING
+ pthread_mutex_lock(&libxfs_bcache->c_mutex);
+ lock_buf_count++;
+ list_add(&bp->b_lock_list, &lock_buf_list);
+ pthread_mutex_unlock(&libxfs_bcache->c_mutex);
+#endif
#ifdef IO_DEBUG
- fprintf(stderr, "%s: allocated %ubytes buffer, key=%llu(%llu), %p\n",
- __FUNCTION__, BBTOB(len),
- (long long)LIBXFS_BBTOOFF64(blkno), (long long)blkno, bp);
+ printf("%lx %s: %s buffer %p for bno = %llu\n",
+ pthread_self(), __FUNCTION__, miss ? "miss" : "hit",
+ bp, (long long)LIBXFS_BBTOOFF64(blkno));
#endif
- libxfs_initbuf(bp, device, blkno, bytes);
}
+
return bp;
}
void
libxfs_putbuf(xfs_buf_t *bp)
{
+#ifdef XFS_BUF_TRACING
+ pthread_mutex_lock(&libxfs_bcache->c_mutex);
+ lock_buf_count--;
+ ASSERT(lock_buf_count >= 0);
+ list_del_init(&bp->b_lock_list);
+ pthread_mutex_unlock(&libxfs_bcache->c_mutex);
+#endif
+ pthread_mutex_unlock(&bp->b_lock);
cache_node_put((struct cache_node *)bp);
}
key.device = bp->b_dev;
key.blkno = bp->b_blkno;
- key.count = bp->b_bcount;
+ key.bblen = bp->b_bcount >> BBSHIFT;
cache_node_purge(libxfs_bcache, &key, (struct cache_node *)bp);
}
static struct cache_node *
-libxfs_balloc(void)
+libxfs_balloc(cache_key_t key)
{
- return libxfs_zone_zalloc(xfs_buf_zone);
+ xfs_bufkey_t *bufkey = (xfs_bufkey_t *)key;
+
+ return (struct cache_node *)libxfs_getbufr(bufkey->device,
+ bufkey->blkno, bufkey->bblen);
}
int
return errno;
}
#ifdef IO_DEBUG
- fprintf(stderr, "readbufr read %ubytes, blkno=%llu(%llu), %p\n",
- bytes, (long long)LIBXFS_BBTOOFF64(blkno), (long long)blkno, bp);
+ printf("%lx: %s: read %u bytes, blkno=%llu(%llu), %p\n",
+ pthread_self(), __FUNCTION__, bytes,
+ (long long)LIBXFS_BBTOOFF64(blkno), (long long)blkno, bp);
#endif
if (bp->b_dev == dev &&
bp->b_blkno == blkno &&
int error;
bp = libxfs_getbuf(dev, blkno, len);
- if (!(bp->b_flags & (LIBXFS_B_UPTODATE|LIBXFS_B_DIRTY))) {
+ if (bp && !(bp->b_flags & (LIBXFS_B_UPTODATE|LIBXFS_B_DIRTY))) {
error = libxfs_readbufr(dev, blkno, bp, len, flags);
if (error) {
libxfs_putbuf(bp);
return EIO;
}
#ifdef IO_DEBUG
- fprintf(stderr, "writebufr wrote %ubytes, blkno=%llu(%llu), %p\n",
- bp->b_bcount, (long long)LIBXFS_BBTOOFF64(bp->b_blkno),
- (long long)bp->b_blkno, bp);
+ printf("%lx: %s: wrote %u bytes, blkno=%llu(%llu), %p\n",
+ pthread_self(), __FUNCTION__, bp->b_bcount,
+ (long long)LIBXFS_BBTOOFF64(bp->b_blkno),
+ (long long)bp->b_blkno, bp);
#endif
bp->b_flags |= LIBXFS_B_UPTODATE;
bp->b_flags &= ~(LIBXFS_B_DIRTY | LIBXFS_B_EXIT);
{
#ifdef IO_DEBUG
if (boff + len > bp->b_bcount) {
- fprintf(stderr, "Badness, iomove out of range!\n"
+ printf("Badness, iomove out of range!\n"
"bp=(bno %llu, bytes %u) range=(boff %u, bytes %u)\n",
(long long)bp->b_blkno, bp->b_bcount, boff, len);
abort();
libxfs_writebufr(bp);
}
+static void
+libxfs_brelse(struct cache_node *node)
+{
+ xfs_buf_t *bp = (xfs_buf_t *)node;
+ xfs_buf_log_item_t *bip;
+ extern xfs_zone_t *xfs_buf_item_zone;
+
+ if (bp != NULL) {
+ if (bp->b_flags & LIBXFS_B_DIRTY)
+ libxfs_writebufr(bp);
+ bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+ if (bip)
+ libxfs_zone_free(xfs_buf_item_zone, bip);
+ free(bp->b_addr);
+ pthread_mutex_destroy(&bp->b_lock);
+ bp->b_addr = NULL;
+ bp->b_flags = 0;
+ free(bp);
+ bp = NULL;
+ }
+}
+
+void
+libxfs_putbufr(xfs_buf_t *bp)
+{
+ libxfs_brelse((struct cache_node *)bp);
+}
+
+
void
libxfs_bcache_purge(void)
{
cache_flush(libxfs_bcache);
}
+int
+libxfs_bcache_overflowed(void)
+{
+ return cache_overflowed(libxfs_bcache);
+}
+
struct cache_operations libxfs_bcache_operations = {
/* .hash */ libxfs_bhash,
/* .alloc */ libxfs_balloc,
}
static struct cache_node *
-libxfs_ialloc(void)
+libxfs_ialloc(cache_key_t key)
{
return libxfs_zone_zalloc(xfs_inode_zone);
}
XFS_BUF_SET_FSPRIVATE2(bp, NULL); /* remove xact ptr */
hold = (bip->bli_flags & XFS_BLI_HOLD);
- if (bip->bli_flags & (XFS_BLI_DIRTY|XFS_BLI_STALE)) {
+ if (bip->bli_flags & XFS_BLI_DIRTY) {
#ifdef XACT_DEBUG
fprintf(stderr, "flushing/staling buffer %p (hold=%d)\n",
bp, hold);
#endif
- if (bip->bli_flags & XFS_BLI_DIRTY)
- libxfs_writebuf_int(bp, 0);
- if (hold)
- bip->bli_flags &= ~XFS_BLI_HOLD;
- else
- libxfs_putbuf(bp);
+ libxfs_writebuf_int(bp, 0);
}
+ if (hold)
+ bip->bli_flags &= ~XFS_BLI_HOLD;
+ else
+ libxfs_putbuf(bp);
/* release the buf item */
kmem_zone_free(xfs_buf_item_zone, bip);
}
.SH NAME
xfs_repair \- repair an XFS filesystem
.SH SYNOPSIS
-.nf
-\f3xfs_repair\f1 [ \f3\-dLMnPvV\f1 ] [ \f3\-o\f1 subopt[=value] ]
- [\f3-t\f1 interval] [\f3-l\f1 logdev] [\f3-r\f1 rtdev] xfs_special
-.sp .8v
-\f3xfs_repair\f1 \f3\-f\f1 [ \f3\-dLMnPvV\f1 ] [ \f3\-o\f1 subopt[=value] ]
- [\f3-t\f1 interval] [\f3-l\f1 logdev] [\f3-r\f1 rtdev] ... file
-.fi
+.B xfs_repair
+[
+.B \-dfLnPv
+] [
+.B \-o
+.I subopt\c
+[\c
+.B =\c
+.IR value ]
+] [
+.B \-t
+.I interval
+] [
+.B \-l
+.I logdev
+] [
+.B \-r
+.I rtdev
+]
+.I device
+.br
+.B xfs_repair \-V
.SH DESCRIPTION
-.I xfs_repair
-is a parallelized version of
-.IR xfs_repair (1m)
-that repairs corrupt or damaged XFS filesystems
+.B xfs_repair
+repairs corrupt or damaged XFS filesystems
(see
-.IR xfs (5)).
+.BR xfs (5)).
The filesystem is specified using the
-.I xfs_special
-argument which should be the device name of the
-disk partition or volume containing
-the filesystem.
-If given the name of a block device,
-.I xfs_repair
+.I device
+argument which should be the device name of the disk partition or
+volume containing the filesystem. If given the name of a block device,
+.B xfs_repair
will attempt to find the raw device associated
-with the specified block device and will use the raw device
-instead.
+with the specified block device and will use the raw device instead.
.PP
Regardless, the filesystem to be repaired
must be unmounted,
otherwise, the resulting filesystem may be inconsistent or corrupt.
-.PP
-The options to \f2xfs_repair\f1 are:
+.SH OPTIONS
.TP
.B \-f
-Specifies that the special device is actually a file (see the
-\f2mkfs.xfs\f1 \f3\-d\f1 \f2file\f1 option).
-This might happen if an image copy
+Specifies that the filesystem image to be processed is stored in a
+regular file at
+.I device
+(see the
+.B mkfs.xfs \-d
+.I file
+option). This might happen if an image copy
of a filesystem has been copied or written into an ordinary file.
This option implies that any external log or realtime section
is also in an ordinary file.
.B \-L
Force Log Zeroing.
Forces
-.I xfs_repair
+.B xfs_repair
to zero the log even if it is dirty (contains metadata changes).
When using this option the filesystem will likely appear to be corrupt,
and can cause the loss of user files and/or data.
.TP
-\f3-l\f1 \f2logdev\f1
+.BI \-l " logdev"
Specifies the device special file where the filesystem's external
-log resides.
-Only for those filesystems which use an external log.
+log resides. Only for those filesystems which use an external log.
See the
-\f2mkfs.xfs\f1 \f3\-l\f1 option, and refer to
-.IR xfs (5)
+.B mkfs.xfs \-l
+option, and refer to
+.BR xfs (5)
for a detailed description of the XFS log.
.TP
-\f3-r\f1 \f2rtdev\f1
+.BI \-r " rtdev"
Specifies the device special file where the filesystem's realtime
-section resides.
-Only for those filesystems which use a realtime section.
+section resides. Only for those filesystems which use a realtime section.
See the
-\f2mkfs.xfs\f1 \f3\-r\f1 option, and refer to
-.IR xfs (5)
+.B mkfs.xfs \-r
+option, and refer to
+.BR xfs (5)
for a detailed description of the XFS realtime section.
.TP
.B \-n
-No modify mode.
-Specifies that
-.I xfs_repair
+No modify mode. Specifies that
+.B xfs_repair
should not modify the filesystem but should only scan the
filesystem and indicate what repairs would have been made.
-.TP
-\f3-o\f1 \f2subopt[=value]\f1
+.HP
+.B \-o
+.I subopt\c
+[\c
+.B =\c
+.IR value ]
+.br
Override what the program might conclude about the filesystem
if left to its own devices.
.IP
The
-.B assume_xfs
-suboption
-specifies that the filesystem is an XFS filesystem.
-Normally, if
-.I xfs_repair
-cannot find an XFS superblock, it checks to see if the
-filesystem is an EFS filesystem before it tries to
-regenerate the XFS superblock.
-If the
-.B assume_xfs
-option is in effect,
-.I xfs_repair
-will assume that the filesystem is an XFS filesystem and
-will ignore an EFS superblock if one is found.
-.IP
-The
+.IR subopt ions
+supported are:
+.RS 1.0i
+.TP
.BI ihash= ihashsize
-suboption modifies the default xfs_repair inode cache hash size.
-The total number of inode cache entries are limited to 8 times this
-amount.
-.IP
-The
+overrides the default inode cache hash size. The total number of
+inode cache entries are limited to 8 times this amount. The default
+.I ihashsize
+is 1024 (for a total of 8192 entries).
+.TP
.BI bhash= bhashsize
-suboption modifies the default xfs_repair buffer cache hash size.
-The total number of buffer cache entries are limited to 8 times this
-amount.
-.IP
-The
-.BI pfino= inode_blocks
-suboption modifies the default size of read ahead xfs_repair inode
-blocks.
-.IP
-The
-.BI pfdir= dir_blocks
-suboption modifies the default size of read ahead xfs_repair dir
-blocks.
-.IP
-The
-.BI thread= thread_count
-suboption modifies the number of xfs_repair parallel threads.
+overrides the default buffer cache hash size. The total number of
+buffer cache entries are limited to 8 times this amount. The default
+size is set to use up the remainder of 75% of the system's physical
+RAM.
+size
.TP
-\f3-t\f1 \f2interval\f1
-Modify reporting interval. During long runs xfs_repair outputs
-its progress every 15 minutes. Reporting is only activated when
-xfs_repair is multi-threaded.
+.BI ag_stride= ags_per_concat_unit
+This creates additional processing threads to parallel process
+AGs that span multiple concat units. This can significantly
+reduce repair times on concat based filesystems.
+.RE
.TP
-.B \-M
-Disable multi-threaded mode. Normally, xfs_repair runs with twice
-the number of threads as processors.
+.B \-t " interval"
+Modify reporting interval. During long runs
+.B xfs_repair
+outputs its progress every 15 minutes. Reporting is only activated when
+ag_stride is enabled.
.TP
.B \-P
-Disable read ahead of inode and directory blocks. If applicable,
-a read ahead of up to 16 additional blocks is done.
+Disable prefetching of inode and directory blocks.
.TP
.B \-v
Verbose output.
.TP
.B \-d
-Repair dangerously. Allow xfs_repair to repair an XFS filesystem
-mounted read only. This is typically done on a root fileystem from
-single user mode, immediately followed by a reboot.
+Repair dangerously. Allow
+.B xfs_repair
+to repair an XFS filesystem mounted read only. This is typically done
+on a root fileystem from single user mode, immediately followed by a reboot.
+.TP
+.B \-V
+Prints out the current version number and exits.
.SS Checks Performed
Inconsistencies corrected include the following:
-.TP
-1.
+.IP 1.
Inode and inode blockmap (addressing) checks:
bad magic number in inode,
bad magic numbers in inode blockmap blocks,
incorrect number of records in inode blockmap blocks,
blocks claimed that are not in a legal data area of the filesystem,
blocks that are claimed by more than one inode.
-.TP
-2.
+.IP 2.
Inode allocation map checks:
bad magic number in inode map blocks,
inode state as indicated by map (free or in-use) inconsistent
the inode allocation map,
inode allocation map referencing blocks that do not appear
to contain inodes.
-.TP
-3.
+.IP 3.
Size checks:
number of blocks claimed by inode inconsistent with inode size,
directory size not block aligned,
inode size not consistent with inode format.
-.TP
-4.
+.IP 4.
Directory checks:
bad magic numbers in directory blocks,
incorrect number of entries in a directory block,
entries out of hashvalue order,
incorrect internal directory pointers,
directory type not consistent with inode format and size.
-.TP
-5.
+.IP 5.
Pathname checks:
files or directories not referenced by a pathname starting from
the filesystem root,
illegal pathname components.
-.TP
-6.
+.IP 6.
Link count checks:
link counts that do not agree with the number of
directory references to the inode.
-.TP
-7.
+.IP 7.
Freemap checks:
blocks claimed free by the freemap but also claimed by an inode,
blocks unclaimed by any inode but not appearing in the freemap.
-.TP
-8.
+.IP 8.
Super Block checks:
total free block and/or free i-node count incorrect,
filesystem geometry inconsistent,
directory.
The name assigned is the inode number.
.SS Disk Errors
-.I xfs_repair
-aborts on most disk I/O errors.
-Therefore, if you are trying
+.B xfs_repair
+aborts on most disk I/O errors. Therefore, if you are trying
to repair a filesystem that was damaged due to a disk drive failure,
-steps should be taken to ensure that
-all blocks in the filesystem are readable and writeable
-before attempting to use
-.I xfs_repair
-to repair the filesystem.
-A possible method is using
-.IR dd (8)
+steps should be taken to ensure that all blocks in the filesystem are
+readable and writeable before attempting to use
+.B xfs_repair
+to repair the filesystem. A possible method is using
+.BR dd (8)
to copy the data onto a good disk.
.SS lost+found
The directory
.I lost+found
does not have to already exist in the filesystem being repaired.
-If the directory does not exist, it is automatically created.
-If the \f2lost+found\f1 directory already exists,
-the \f2lost+found\f1
-directory is deleted and recreated every time \f2xfs_repair\f1
-runs.
-This ensures that there are no name conflicts in \f2lost+found\f1.
-However, if you rename a file in \f2lost+found\f1 and leave it there,
-if \f2xfs_repair\f1 is run again, that file is renamed back to
-its inode number.
+If the directory does not exist, it is automatically created if required.
+If it already exists, it will be checked for consistency and if valid
+will be used for additional orphaned files. Invalid
+.I lost+found
+directories are removed and recreated. Existing files in a valid
+.I lost+found
+are not removed or renamed.
.SS Corrupted Superblocks
XFS has both primary and secondary superblocks.
-\f2xfs_repair\f1 uses information in the primary superblock
+.B xfs_repair
+uses information in the primary superblock
to automatically find and validate the primary superblock
against the secondary superblocks before proceeding.
Should the primary be too corrupted to be useful in locating
until it finds and validates some secondary superblocks.
At that point, it generates a primary superblock.
.SS Quotas
-If quotas are in use, it is possible that \f2xfs_repair\f1 will clear
-some or all of the filesystem quota information.
+If quotas are in use, it is possible that
+.B xfs_repair
+will clear some or all of the filesystem quota information.
If so, the program issues a warning just before it terminates.
If all quota information is lost, quotas are disabled and the
program issues a warning to that effect.
.PP
-Note that \f2xfs_repair\f1 does not check the validity of quota limits.
-It is recommended that you check the quota limit information manually
-after \f2xfs_repair\f1.
+Note that
+.B xfs_repair
+does not check the validity of quota limits. It is recommended
+that you check the quota limit information manually after
+.BR xfs_repair .
Also, space usage information is automatically regenerated the
next time the filesystem is mounted with quotas turned on, so the
next quota mount of the filesystem may take some time.
.SH DIAGNOSTICS
-.I xfs_repair
+.B xfs_repair
issues informative messages as it proceeds
indicating what it has found that is abnormal or any corrective
action that it has taken.
Most of the messages are completely understandable only to those
who are knowledgeable about the structure of the filesystem.
Some of the more common messages are explained here.
-Note that the language of the messages is slightly different
-if \f2xfs_repair\f1 is run in no-modify mode because the program is not
-changing anything on disk.
+Note that the language of the messages is slightly different if
+.B xfs_repair
+is run in no-modify mode because the program is not changing anything on disk.
No-modify mode indicates what it would do to repair the filesystem
if run without the no-modify flag.
.PP
-disconnected inode \f3xxxx\f1, moving to \f2lost+found\f1
+.B disconnected inode
+.IB ino ,
+.B moving to lost+found
.IP
An inode numbered
-.B xxxx
+.I ino
was not connected to the filesystem
-directory tree and was reconnected to the \f2lost+found\f1 directory.
-The inode is assigned the name of its inode number (i-number).
-If a \f2lost+found\f1 directory does not exist, it is automatically
-created.
+directory tree and was reconnected to the
+.I lost+found
+directory. The inode is assigned the name of its inode number
+.RI ( ino ).
+If a
+.I lost+found
+directory does not exist, it is automatically created.
.PP
-disconnected dir inode \f3xxxx\f1, moving to \f2lost+found\f1
+.B disconnected dir inode
+.IB ino ,
+.B moving to lost+found
.IP
As above only the inode is a directory inode.
-If a directory inode is attached to \f2lost+found\f1, all of its
-children (if any) stay attached to the directory and therefore
+If a directory inode is attached to
+.IR lost+found ,
+all of its children (if any) stay attached to the directory and therefore
get automatically reconnected when the directory is reconnected.
.PP
-imap claims in-use inode \f3xxxx\f1 is free, correcting imap
+.B imap claims in-use inode
+.I ino
+.B is free, correcting imap
.IP
-The inode allocation map thinks that inode \f3xxxx\f1 is
-free whereas examination of the inode indicates that the
+The inode allocation map thinks that inode
+.I ino
+is free whereas examination of the inode indicates that the
inode may be in use (although it may be disconnected).
The program updates the inode allocation map.
.PP
-imap claims free inode \f3xxxx\f1 is in use, correcting imap
+.B imap claims free inode
+.I ino
+.B is in use, correcting imap
.IP
-The inode allocation map thinks that inode \f3xxxx\f1 is
-in use whereas examination of the inode indicates that the
+The inode allocation map thinks that inode
+.I ino
+is in use whereas examination of the inode indicates that the
inode is not in use and therefore is free.
The program updates the inode allocation map.
.PP
-resetting inode \f3xxxx\f1 nlinks from \f3x\f1 to \f3y\f1
+.B resetting inode
+.I ino
+.B nlinks from
+.I x
+.B to
+.I y
.IP
The program detected a mismatch between the
-number of valid directory entries referencing inode \f3xxxx\f1
+number of valid directory entries referencing inode
+.I ino
and the number of references recorded in the inode and corrected the
the number in the inode.
.PP
-\f3fork-type\f1 fork in ino \f3xxxx\f1 claims used block \f3yyyy\f1
+.I fork-type
+.B fork in ino
+.I ino
+.B claims used block
+.I bno
.IP
-Inode \f3xxxx\f1 claims a block \f3yyyy\f1 that is used (claimed)
-by either another inode or the filesystem itself for metadata storage.
-The \f3fork-type\f1 is either \f3data\f1 or \f3attr\f1
+Inode
+.I ino
+claims a block
+.I bno
+that is used (claimed) by either another inode or the filesystem
+itself for metadata storage. The
+.I fork-type
+is either
+.B data
+or
+.B attr
indicating whether the problem lies in the portion of the
inode that tracks regular data or the portion of the inode
that stores XFS attributes.
Any inode that claims blocks used by the filesystem is deleted.
If two or more inodes claim the same block, they are both deleted.
.PP
-\f3fork-type\f1 fork in ino \f3xxxx\f1 claims dup extent ...
+.I fork-type
+.B fork in ino
+.I ino
+.B claims dup extent ...
.IP
-Inode \f3xxxx\f1 claims a block in an extent known to be
-claimed more than once.
+Inode
+.I ino
+claims a block in an extent known to be claimed more than once.
The offset in the inode, start and length of the extent is given.
The message is slightly different
if the inode is a real-time (rt) inode and the extent is therefore
a real-time (rt) extent.
.PP
-inode \f3xxxx\f1 - bad extent ...
+.B inode
+.I ino
+.B \- bad extent ...
.IP
-An extent record in the blockmap of inode \f3xxxx\f1 claims
-blocks that are out of the legal range of the filesystem.
-The message supplies the start, end, and file offset of
-the extent.
-The message is slightly different
-if the extent is a real-time (rt) extent.
+An extent record in the blockmap of inode
+.I ino
+claims blocks that are out of the legal range of the filesystem.
+The message supplies the start, end, and file offset of the extent.
+The message is slightly different if the extent is a real-time (rt) extent.
.PP
-bad \f3fork-type\f1 fork in inode \f3xxxx\f1
+.B bad
+.I fork-type
+.B fork in inode
+.I ino
.IP
There was something structurally wrong or inconsistent with the
data structures that map offsets to filesystem blocks.
.PP
-cleared inode \f3xxxx\f1
+.B cleared inode
+.I ino
.IP
There was something wrong with the inode that
was uncorrectable so the program freed the inode.
This usually happens because the inode claims
blocks that are used by something else or the inode itself
-is badly corrupted.
-Typically, this message
+is badly corrupted. Typically, this message
is preceded by one or more messages indicating why the
inode needed to be cleared.
.PP
-bad attribute fork in inode \f3xxxx\f1, clearing attr fork
+.B bad attribute fork in inode
+.IR ino ,
+.B clearing attr fork
.IP
There was something wrong with the portion of the inode that
stores XFS attributes (the attribute fork) so the program reset
the attribute fork.
As a result of this, all attributes on that inode are lost.
.PP
-correcting nextents for inode \f3xxxx\f1, was \f3x\f1 - counted \f3y\f1
+.B correcting nextents for inode
+.IR ino ,
+.B was
+.I x
+.B \- counted
+.I y
.IP
The program found that the number of extents used to store
the data in the inode is wrong and corrected the number.
The message refers to nextents if the count is wrong
on the number of extents used to store attribute information.
.PP
-entry \f3"name"\f1 in dir \f3xxxx\f1 not consistent
-with ..
-value (\f3yyyy\f1) in dir ino \f3xxxx\f1,
-junking entry \f3"name"\f1 in directory inode \f3xxxx\f1
+.B entry
+.I name
+.B in dir
+.I dir_ino
+.B not consistent with .. value
+.BI ( xxxx )
+.B in dir ino
+.IB ino ,
+.B junking entry
+.I name
+.B in directory inode
+.I dir_ino
.IP
-The entry \f3"name"\f1 in directory inode \f3xxxx\f1 references a
-directory inode \f3yyyy\f1.
-However, the ..\& entry in directory \f3yyyy\f1 does not point
-back to directory \f3xxxx\f1,
-so the program deletes the entry \f3"name"\f1 in directory inode
-\f3xxxx\f1.
-If the directory inode \f3yyyy\f1 winds up becoming a disconnected
-inode as a result of this, it is moved to \f2lost+found\f1 later.
+The entry
+.I name
+in directory inode
+.I dir_ino
+references a directory inode
+.IR ino .
+However, the ..\& entry in directory
+.I ino
+does not point back to directory
+.IR dir_ino ,
+so the program deletes the entry
+.I name
+in directory inode
+.IR dir_ino .
+If the directory inode
+.I ino
+winds up becoming a disconnected inode as a result of this, it is moved to
+.I lost+found
+later.
.PP
-entry \f3"name"\f1 in dir \f3xxxx\f1 references already
-connected dir ino \f3yyyy\f1,
-junking entry \f3"name"\f1 in directory inode \f3xxxx\f1
+.B entry
+.I name
+.B in dir
+.I dir_ino
+.B references already connected dir ino
+.IB ino ,
+.B junking entry
+.I name
+.B in directory inode
+.I dir_ino
.IP
-The entry \f3"name"\f1 in directory inode \f3xxxx\f1 points to a
-directory inode \f3yyyy\f1 that is known to be a child of another
-directory.
+The entry
+.I name
+in directory inode
+.I dir_ino
+points to a directory inode
+.I ino
+that is known to be a child of another directory.
Therefore, the entry is invalid and is deleted.
This message refers to an entry in a small directory.
If this were a large directory, the last phrase would read
"will clear entry".
.PP
-entry references free inode \f3xxxx\f1 in directory \f3yyyy\f1,
-will clear entry
+.B entry references free inode
+.I ino
+.B in directory
+.IB dir_ino ,
+.B will clear entry
.IP
-An entry in directory inode \f3yyyy\f1 references an inode \f3xxxx\f1
-that is known to be free.
-The entry is therefore invalid and is deleted.
+An entry in directory inode
+.I dir_ino
+references an inode
+.I ino
+that is known to be free. The entry is therefore invalid and is deleted.
This message refers to a large directory.
If the directory were small, the message would read "junking entry ...".
.SH EXIT STATUS
-.I xfs_repair -n
+.B xfs_repair \-n
(no modify node)
will return a status of 1 if filesystem corruption was detected and
0 if no filesystem corruption was detected.
-.I xfs_repair
+.B xfs_repair
run without the \-n option will always return a status code of 0.
.SH BUGS
The filesystem to be checked and repaired must have been
unmounted cleanly using normal system administration procedures
(the
-.IR umount (8)
+.BR umount (8)
command or system shutdown), not as a result of a crash or system reset.
If the filesystem has not been unmounted cleanly, mount it and unmount
it cleanly before running
-.IR xfs_repair .
+.BR xfs_repair .
.PP
-.I xfs_repair
+.B xfs_repair
does not do a thorough job on XFS extended attributes.
The structure of the attribute fork will be consistent,
but only the contents of attribute forks that will fit into
an inode are checked.
This limitation will be fixed in the future.
.PP
-The no-modify mode (\f3\-n\f1 option) is not completely
-accurate.
+The no-modify mode
+.RB ( \-n
+option) is not completely accurate.
It does not catch inconsistencies in the freespace and inode
maps, particularly lost blocks or subtly corrupted maps (trees).
.PP
.B xfs_repair
fixed and/or improved.
.SH SEE ALSO
-dd(1),
-mkfs.xfs(8),
-umount(8),
-xfs_check(8),
-xfs_metadump(8),
-xfs(5).
+.BR dd (1),
+.BR mkfs.xfs (8),
+.BR umount (8),
+.BR xfs_check (8),
+.BR xfs_metadump (8),
+.BR xfs (5).
LTCOMMAND = xfs_repair
HFILES = agheader.h attr_repair.h avl.h avl64.h bmap.h dinode.h dir.h \
- dir2.h dir_stack.h err_protos.h globals.h incore.h protos.h rt.h \
- progress.h scan.h versions.h prefetch.h threads.h
+ dir2.h err_protos.h globals.h incore.h protos.h rt.h \
+ progress.h scan.h versions.h prefetch.h radix-tree.h threads.h
CFILES = agheader.c attr_repair.c avl.c avl64.c bmap.c dino_chunks.c \
- dinode.c dir.c dir2.c dir_stack.c globals.c incore.c \
+ dinode.c dir.c dir2.c globals.c incore.c \
incore_bmc.c init.c incore_ext.c incore_ino.c phase1.c \
- phase2.c phase3.c phase4.c phase5.c phase6.c phase7.c rt.c sb.c \
- progress.c prefetch.c scan.c threads.c versions.c xfs_repair.c
+ phase2.c phase3.c phase4.c phase5.c phase6.c phase7.c \
+ progress.c prefetch.c radix-tree.c rt.c sb.c scan.c threads.c \
+ versions.c xfs_repair.c
LLDLIBS = $(LIBXFS) $(LIBXLOG) $(LIBUUID) $(LIBPTHREAD) $(LIBRT)
LTDEPENDENCIES = $(LIBXFS) $(LIBXLOG)
# -DXR_BLD_INO_TRACE building on-disk inode allocation btrees
# -DXR_BLD_ADD_EXTENT track phase 5 block extent creation
# -DXR_BCKPTR_DBG parent list debugging info
+# -DXR_PF_TRACE prefetch trace
#
#CFLAGS += ...
#include "err_protos.h"
#include "dir.h"
#include "dinode.h"
-#include "prefetch.h"
-#include "threads.h"
#include "versions.h"
+#include "prefetch.h"
#include "progress.h"
/*
if (check_inode_block(mp, ino) == 0)
return(0);
- PREPAIR_RW_WRITE_LOCK(&per_ag_lock[agno]);
+ pthread_mutex_lock(&ag_locks[agno]);
+
switch (state = get_agbno_state(mp, agno, agbno)) {
case XR_E_INO:
do_warn(
_("uncertain inode block %d/%d already known\n"),
agno, agbno);
- PREPAIR_RW_UNLOCK(&per_ag_lock[agno]);
break;
case XR_E_UNKNOWN:
case XR_E_FREE1:
case XR_E_FREE:
set_agbno_state(mp, agno, agbno, XR_E_INO);
- PREPAIR_RW_UNLOCK(&per_ag_lock[agno]);
break;
case XR_E_MULT:
case XR_E_INUSE:
_("inode block %d/%d multiply claimed, (state %d)\n"),
agno, agbno, state);
set_agbno_state(mp, agno, agbno, XR_E_MULT);
- PREPAIR_RW_UNLOCK(&per_ag_lock[agno]);
+ pthread_mutex_unlock(&ag_locks[agno]);
return(0);
default:
do_warn(
_("inode block %d/%d bad state, (state %d)\n"),
agno, agbno, state);
set_agbno_state(mp, agno, agbno, XR_E_INO);
- PREPAIR_RW_UNLOCK(&per_ag_lock[agno]);
break;
}
+ pthread_mutex_unlock(&ag_locks[agno]);
+
start_agino = XFS_OFFBNO_TO_AGINO(mp, agbno, 0);
*start_ino = XFS_AGINO_TO_INO(mp, agno, start_agino);
* user data -- we're probably here as a result of a directory
* entry or an iunlinked pointer
*/
- PREPAIR_RW_WRITE_LOCK(&per_ag_lock[agno]);
+ pthread_mutex_lock(&ag_locks[agno]);
for (j = 0, cur_agbno = chunk_start_agbno;
cur_agbno < chunk_stop_agbno; cur_agbno++) {
switch (state = get_agbno_state(mp, agno, cur_agbno)) {
}
if (j) {
- PREPAIR_RW_UNLOCK(&per_ag_lock[agno]);
+ pthread_mutex_unlock(&ag_locks[agno]);
return(0);
}
}
- PREPAIR_RW_UNLOCK(&per_ag_lock[agno]);
+ pthread_mutex_unlock(&ag_locks[agno]);
/*
* ok, chunk is good. put the record into the tree if required,
set_inode_used(irec_p, agino - start_agino);
- PREPAIR_RW_WRITE_LOCK(&per_ag_lock[agno]);
+ pthread_mutex_lock(&ag_locks[agno]);
+
for (cur_agbno = chunk_start_agbno;
cur_agbno < chunk_stop_agbno; cur_agbno++) {
switch (state = get_agbno_state(mp, agno, cur_agbno)) {
break;
}
}
- PREPAIR_RW_UNLOCK(&per_ag_lock[agno]);
+ pthread_mutex_unlock(&ag_locks[agno]);
return(ino_cnt);
}
*
* *bogus is set to 1 if the entire set of inodes is bad.
*/
+
/* ARGSUSED */
-int
-process_inode_chunk(xfs_mount_t *mp, xfs_agnumber_t agno, int num_inos,
- ino_tree_node_t *first_irec, int ino_discovery,
- int check_dups, int extra_attr_check, int *bogus)
+static int
+process_inode_chunk(
+ xfs_mount_t *mp,
+ xfs_agnumber_t agno,
+ int num_inos,
+ ino_tree_node_t *first_irec,
+ int ino_discovery,
+ int check_dups,
+ int extra_attr_check,
+ int *bogus)
{
xfs_ino_t parent;
ino_tree_node_t *ino_rec;
- xfs_buf_t *bp;
+ xfs_buf_t **bplist;
xfs_dinode_t *dino;
int icnt;
int status;
int is_used;
int state;
- int done;
int ino_dirty;
int irec_offset;
int ibuf_offset;
int dirty = 0;
int cleared = 0;
int isa_dir = 0;
+ int blks_per_cluster;
+ int cluster_count;
+ int bp_index;
+ int cluster_offset;
ASSERT(first_irec != NULL);
ASSERT(XFS_AGINO_TO_OFFSET(mp, first_irec->ino_startnum) == 0);
*bogus = 0;
ASSERT(XFS_IALLOC_BLOCKS(mp) > 0);
+ blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog;
+ if (blks_per_cluster == 0)
+ blks_per_cluster = 1;
+ cluster_count = XFS_INODES_PER_CHUNK / inodes_per_cluster;
+ ASSERT(cluster_count > 0);
+
/*
* get all blocks required to read in this chunk (may wind up
* having to process more chunks in a multi-chunk per block fs)
*/
agbno = XFS_AGINO_TO_AGBNO(mp, first_irec->ino_startnum);
- bp = libxfs_readbuf(mp->m_dev, XFS_AGB_TO_DADDR(mp, agno, agbno),
- XFS_FSB_TO_BB(mp, XFS_IALLOC_BLOCKS(mp)), 0);
- if (!bp) {
- do_warn(_("cannot read inode %llu, disk block %lld, cnt %d\n"),
- XFS_AGINO_TO_INO(mp, agno, first_irec->ino_startnum),
- XFS_AGB_TO_DADDR(mp, agno, agbno),
- (int)XFS_FSB_TO_BB(mp, XFS_IALLOC_BLOCKS(mp)));
- return(1);
- }
-
/*
* set up first irec
*/
ino_rec = first_irec;
+
+ bplist = malloc(cluster_count * sizeof(xfs_buf_t *));
+ if (bplist == NULL)
+ do_error(_("failed to allocate %d bytes of memory\n"),
+ cluster_count * sizeof(xfs_buf_t*));
+
+ for (bp_index = 0; bp_index < cluster_count; bp_index++) {
+#ifdef XR_PF_TRACE
+ pftrace("about to read off %llu in AG %d",
+ (long long)XFS_AGB_TO_DADDR(mp, agno, agbno), agno);
+#endif
+ bplist[bp_index] = libxfs_readbuf(mp->m_dev,
+ XFS_AGB_TO_DADDR(mp, agno, agbno),
+ XFS_FSB_TO_BB(mp, blks_per_cluster), 0);
+ if (!bplist[bp_index]) {
+ do_warn(_("cannot read inode %llu, disk block %lld, cnt %d\n"),
+ XFS_AGINO_TO_INO(mp, agno, first_irec->ino_startnum),
+ XFS_AGB_TO_DADDR(mp, agno, agbno),
+ (int)XFS_FSB_TO_BB(mp, blks_per_cluster));
+ while (bp_index > 0) {
+ bp_index--;
+ libxfs_putbuf(bplist[bp_index]);
+ }
+ free(bplist);
+ return(1);
+ }
+ agbno += blks_per_cluster;
+
+#ifdef XR_PF_TRACE
+ pftrace("readbuf %p (%llu, %d) in AG %d", bplist[bp_index],
+ (long long)XFS_BUF_ADDR(bplist[bp_index]),
+ XFS_BUF_COUNT(bplist[bp_index]), agno);
+#endif
+ }
+ agbno = XFS_AGINO_TO_AGBNO(mp, first_irec->ino_startnum);
+
/*
* initialize counters
*/
irec_offset = 0;
ibuf_offset = 0;
+ cluster_offset = 0;
icnt = 0;
status = 0;
- done = 0;
+ bp_index = 0;
/*
* verify inode chunk if necessary
*/
if (ino_discovery) {
- while (!done) {
+ for (;;) {
/*
* make inode pointer
*/
- dino = XFS_MAKE_IPTR(mp, bp, icnt);
+ dino = XFS_MAKE_IPTR(mp, bplist[bp_index], cluster_offset);
agino = irec_offset + ino_rec->ino_startnum;
/*
irec_offset++;
icnt++;
+ cluster_offset++;
if (icnt == XFS_IALLOC_INODES(mp) &&
irec_offset == XFS_INODES_PER_CHUNK) {
* done! - finished up irec and block
* simultaneously
*/
- libxfs_putbuf(bp);
- done = 1;
break;
} else if (irec_offset == XFS_INODES_PER_CHUNK) {
/*
ASSERT(ino_rec->ino_startnum == agino + 1);
irec_offset = 0;
}
+ if (cluster_offset == inodes_per_cluster) {
+ bp_index++;
+ cluster_offset = 0;
+ }
}
/*
*/
if (!status) {
*bogus = 1;
- if (!done) /* already free'd */
- libxfs_putbuf(bp);
+ for (bp_index = 0; bp_index < cluster_count; bp_index++)
+ libxfs_putbuf(bplist[bp_index]);
+ free(bplist);
return(0);
}
ino_rec = first_irec;
irec_offset = 0;
- ibuf_offset = 0;
+ cluster_offset = 0;
+ bp_index = 0;
icnt = 0;
status = 0;
- done = 0;
-
- /* nathans TODO ... memory leak here?: */
-
- /*
- * get first block
- */
- bp = libxfs_readbuf(mp->m_dev,
- XFS_AGB_TO_DADDR(mp, agno, agbno),
- XFS_FSB_TO_BB(mp, XFS_IALLOC_BLOCKS(mp)), 0);
- if (!bp) {
- do_warn(_("can't read inode %llu, disk block %lld, "
- "cnt %d\n"), XFS_AGINO_TO_INO(mp, agno, agino),
- XFS_AGB_TO_DADDR(mp, agno, agbno),
- (int)XFS_FSB_TO_BB(mp, XFS_IALLOC_BLOCKS(mp)));
- return(1);
- }
}
/*
* mark block as an inode block in the incore bitmap
*/
- PREPAIR_RW_WRITE_LOCK(&per_ag_lock[agno]);
+ pthread_mutex_lock(&ag_locks[agno]);
switch (state = get_agbno_state(mp, agno, agbno)) {
- case XR_E_INO: /* already marked */
- break;
- case XR_E_UNKNOWN:
- case XR_E_FREE:
- case XR_E_FREE1:
- set_agbno_state(mp, agno, agbno, XR_E_INO);
- break;
- case XR_E_BAD_STATE:
- do_error(_("bad state in block map %d\n"), state);
- break;
- default:
- set_agbno_state(mp, agno, agbno, XR_E_MULT);
- do_warn(_("inode block %llu multiply claimed, state was %d\n"),
- XFS_AGB_TO_FSB(mp, agno, agbno), state);
- break;
+ case XR_E_INO: /* already marked */
+ break;
+ case XR_E_UNKNOWN:
+ case XR_E_FREE:
+ case XR_E_FREE1:
+ set_agbno_state(mp, agno, agbno, XR_E_INO);
+ break;
+ case XR_E_BAD_STATE:
+ do_error(_("bad state in block map %d\n"), state);
+ break;
+ default:
+ set_agbno_state(mp, agno, agbno, XR_E_MULT);
+ do_warn(_("inode block %llu multiply claimed, state was %d\n"),
+ XFS_AGB_TO_FSB(mp, agno, agbno), state);
+ break;
}
- PREPAIR_RW_UNLOCK(&per_ag_lock[agno]);
+ pthread_mutex_unlock(&ag_locks[agno]);
- while (!done) {
+ for (;;) {
/*
* make inode pointer
*/
- dino = XFS_MAKE_IPTR(mp, bp, icnt);
+ dino = XFS_MAKE_IPTR(mp, bplist[bp_index], cluster_offset);
agino = irec_offset + ino_rec->ino_startnum;
is_used = 3;
irec_offset++;
ibuf_offset++;
icnt++;
+ cluster_offset++;
if (icnt == XFS_IALLOC_INODES(mp) &&
irec_offset == XFS_INODES_PER_CHUNK) {
/*
* done! - finished up irec and block simultaneously
*/
- if (dirty && !no_modify)
- libxfs_writebuf(bp, 0);
- else
- libxfs_putbuf(bp);
-
- done = 1;
+ for (bp_index = 0; bp_index < cluster_count; bp_index++) {
+#ifdef XR_PF_TRACE
+ pftrace("put/writebuf %p (%llu) in AG %d", bplist[bp_index],
+ (long long)XFS_BUF_ADDR(bplist[bp_index]), agno);
+#endif
+ if (dirty && !no_modify)
+ libxfs_writebuf(bplist[bp_index], 0);
+ else
+ libxfs_putbuf(bplist[bp_index]);
+ }
+ free(bplist);
break;
} else if (ibuf_offset == mp->m_sb.sb_inopblock) {
/*
ibuf_offset = 0;
agbno++;
- PREPAIR_RW_WRITE_LOCK(&per_ag_lock[agno]);
+ pthread_mutex_lock(&ag_locks[agno]);
switch (state = get_agbno_state(mp, agno, agbno)) {
case XR_E_INO: /* already marked */
break;
XFS_AGB_TO_FSB(mp, agno, agbno), state);
break;
}
- PREPAIR_RW_UNLOCK(&per_ag_lock[agno]);
+ pthread_mutex_unlock(&ag_locks[agno]);
} else if (irec_offset == XFS_INODES_PER_CHUNK) {
/*
ASSERT(ino_rec->ino_startnum == agino + 1);
irec_offset = 0;
}
+ if (cluster_offset == inodes_per_cluster) {
+ bp_index++;
+ cluster_offset = 0;
+ }
}
return(0);
}
* phase 4 after we've run through and set the bitmap once.
*/
void
-process_aginodes(xfs_mount_t *mp, xfs_agnumber_t agno,
- int ino_discovery, int check_dups, int extra_attr_check)
+process_aginodes(
+ xfs_mount_t *mp,
+ prefetch_args_t *pf_args,
+ xfs_agnumber_t agno,
+ int ino_discovery,
+ int check_dups,
+ int extra_attr_check)
{
- int num_inos, bogus;
- ino_tree_node_t *ino_rec, *first_ino_rec, *prev_ino_rec;
- ino_tree_node_t *ino_ra;
-
- ino_ra = do_prefetch ? prefetch_inode_chunks(mp, agno, NULL) : NULL;
-
+ int num_inos, bogus;
+ ino_tree_node_t *ino_rec, *first_ino_rec, *prev_ino_rec;
+#ifdef XR_PF_TRACE
+ int count;
+#endif
first_ino_rec = ino_rec = findfirst_inode_rec(agno);
+
while (ino_rec != NULL) {
/*
* paranoia - step through inode records until we step
*/
num_inos = XFS_INODES_PER_CHUNK;
while (num_inos < XFS_IALLOC_INODES(mp) && ino_rec != NULL) {
- ASSERT(ino_rec != NULL);
/*
* inodes chunks will always be aligned and sized
* correctly
ASSERT(num_inos == XFS_IALLOC_INODES(mp));
- if (do_prefetch && ino_ra && (first_ino_rec->ino_startnum >= ino_ra->ino_startnum))
- ino_ra = prefetch_inode_chunks(mp, agno, ino_ra);
+ if (pf_args) {
+ sem_post(&pf_args->ra_count);
+#ifdef XR_PF_TRACE
+ sem_getvalue(&pf_args->ra_count, &count);
+ pftrace("processing inode chunk %p in AG %d (sem count = %d)",
+ first_ino_rec, agno, count);
+#endif
+ }
if (process_inode_chunk(mp, agno, num_inos, first_ino_rec,
- ino_discovery, check_dups, extra_attr_check, &bogus)) {
+ ino_discovery, check_dups, extra_attr_check,
+ &bogus)) {
/* XXX - i/o error, we've got a problem */
abort();
}
return(NULLDFSBNO);
}
-/*
- * process_bmbt_reclist_int is the most compute intensive
- * function in repair. The following macros reduce the
- * the large number of lock/unlock steps it would otherwise
- * call.
- */
-#define PROCESS_BMBT_DECL(type, var) type var
-#define PROCESS_BMBT_LOCK(agno) \
- if (do_parallel && (agno != locked_agno)) { \
- if (locked_agno != -1) /* release old ag lock */ \
- PREPAIR_RW_UNLOCK_NOTEST(&per_ag_lock[locked_agno]); \
- PREPAIR_RW_WRITE_LOCK_NOTEST(&per_ag_lock[agno]); \
- locked_agno = agno; \
+static int
+process_rt_rec(
+ xfs_mount_t *mp,
+ xfs_bmbt_rec_32_t *rp,
+ xfs_ino_t ino,
+ xfs_drfsbno_t *tot,
+ int check_dups)
+{
+ xfs_dfsbno_t b;
+ xfs_drtbno_t ext;
+ xfs_dfilblks_t c; /* count */
+ xfs_dfsbno_t s; /* start */
+ xfs_dfiloff_t o; /* offset */
+ int state;
+ int flag; /* extent flag */
+ int pwe; /* partially-written extent */
+
+ convert_extent(rp, &o, &s, &c, &flag);
+
+ /*
+ * check numeric validity of the extent
+ */
+ if (s >= mp->m_sb.sb_rblocks) {
+ do_warn(_("inode %llu - bad rt extent start block number "
+ "%llu, offset %llu\n"), ino, s, o);
+ return 1;
+ }
+ if (s + c - 1 >= mp->m_sb.sb_rblocks) {
+ do_warn(_("inode %llu - bad rt extent last block number %llu, "
+ "offset %llu\n"), ino, s + c - 1, o);
+ return 1;
+ }
+ if (s + c - 1 < s) {
+ do_warn(_("inode %llu - bad rt extent overflows - start %llu, "
+ "end %llu, offset %llu\n"),
+ ino, s, s + c - 1, o);
+ return 1;
+ }
+
+ /*
+ * verify that the blocks listed in the record
+ * are multiples of an extent
+ */
+ if (XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb) == 0 &&
+ (s % mp->m_sb.sb_rextsize != 0 ||
+ c % mp->m_sb.sb_rextsize != 0)) {
+ do_warn(_("malformed rt inode extent [%llu %llu] (fs rtext "
+ "size = %u)\n"), s, c, mp->m_sb.sb_rextsize);
+ return 1;
+ }
+
+ /*
+ * set the appropriate number of extents
+ */
+ for (b = s; b < s + c; b += mp->m_sb.sb_rextsize) {
+ ext = (xfs_drtbno_t) b / mp->m_sb.sb_rextsize;
+ pwe = XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb) && flag &&
+ (b % mp->m_sb.sb_rextsize != 0);
+
+ if (check_dups == 1) {
+ if (search_rt_dup_extent(mp, ext) && !pwe) {
+ do_warn(_("data fork in rt ino %llu claims "
+ "dup rt extent, off - %llu, "
+ "start - %llu, count %llu\n"),
+ ino, o, s, c);
+ return 1;
+ }
+ continue;
+ }
+
+ state = get_rtbno_state(mp, ext);
+
+ switch (state) {
+ case XR_E_FREE:
+ case XR_E_UNKNOWN:
+ set_rtbno_state(mp, ext, XR_E_INUSE);
+ break;
+
+ case XR_E_BAD_STATE:
+ do_error(_("bad state in rt block map %llu\n"),
+ ext);
+
+ case XR_E_FS_MAP:
+ case XR_E_INO:
+ case XR_E_INUSE_FS:
+ do_error(_("data fork in rt inode %llu found "
+ "metadata block %llu in rt bmap\n"),
+ ino, ext);
+
+ case XR_E_INUSE:
+ if (pwe)
+ break;
+
+ case XR_E_MULT:
+ set_rtbno_state(mp, ext, XR_E_MULT);
+ do_warn(_("data fork in rt inode %llu claims "
+ "used rt block %llu\n"),
+ ino, ext);
+ return 1;
+
+ case XR_E_FREE1:
+ default:
+ do_error(_("illegal state %d in rt block map "
+ "%llu\n"), state, b);
+ }
}
-#define PROCESS_BMBT_UNLOCK_RETURN(val) \
- do { \
- if (locked_agno != -1) \
- PREPAIR_RW_UNLOCK_NOTEST(&per_ag_lock[locked_agno]); \
- return (val); \
- } while (0)
+ /*
+ * bump up the block counter
+ */
+ *tot += c;
+
+ return 0;
+}
/*
* return 1 if inode should be cleared, 0 otherwise
int whichfork)
{
xfs_dfsbno_t b;
- xfs_drtbno_t ext;
xfs_dfilblks_t c; /* count */
xfs_dfilblks_t cp = 0; /* prev count */
xfs_dfsbno_t s; /* start */
int i;
int state;
int flag; /* extent flag */
- int pwe; /* partially-written extent */
xfs_dfsbno_t e;
xfs_agnumber_t agno;
xfs_agblock_t agbno;
- PROCESS_BMBT_DECL
- (xfs_agnumber_t, locked_agno=-1);
+ xfs_agnumber_t locked_agno = -1;
+ int error = 1;
if (whichfork == XFS_DATA_FORK)
forkname = _("data");
else
*last_key = o;
if (i > 0 && op + cp > o) {
- do_warn(
- _("bmap rec out of order, inode %llu entry %d "
- "[o s c] [%llu %llu %llu], %d [%llu %llu %llu]\n"),
+ do_warn(_("bmap rec out of order, inode %llu entry %d "
+ "[o s c] [%llu %llu %llu], %d [%llu %llu %llu]\n"),
ino, i, o, s, c, i-1, op, sp, cp);
- PROCESS_BMBT_UNLOCK_RETURN(1);
+ goto done;
}
op = o;
cp = c;
* check numeric validity of the extent
*/
if (c == 0) {
- do_warn(
- _("zero length extent (off = %llu, fsbno = %llu) in ino %llu\n"),
- o, s, ino);
- PROCESS_BMBT_UNLOCK_RETURN(1);
- }
- if (type == XR_INO_RTDATA) {
- if (s >= mp->m_sb.sb_rblocks) {
- do_warn(
- _("inode %llu - bad rt extent start block number %llu, offset %llu\n"),
- ino, s, o);
- PROCESS_BMBT_UNLOCK_RETURN(1);
- }
- if (s + c - 1 >= mp->m_sb.sb_rblocks) {
- do_warn(
- _("inode %llu - bad rt extent last block number %llu, offset %llu\n"),
- ino, s + c - 1, o);
- PROCESS_BMBT_UNLOCK_RETURN(1);
- }
- if (s + c - 1 < s) {
- do_warn(
- _("inode %llu - bad rt extent overflows - start %llu, end %llu, "
- "offset %llu\n"),
- ino, s, s + c - 1, o);
- PROCESS_BMBT_UNLOCK_RETURN(1);
- }
- } else {
- switch (verify_dfsbno_range(mp, s, c)) {
- case XR_DFSBNORANGE_VALID:
- break;
- case XR_DFSBNORANGE_BADSTART:
- do_warn(
- _("inode %llu - bad extent starting block number %llu, offset %llu\n"),
- ino, s, o);
- PROCESS_BMBT_UNLOCK_RETURN(1);
- case XR_DFSBNORANGE_BADEND:
- do_warn(
- _("inode %llu - bad extent last block number %llu, offset %llu\n"),
- ino, s + c - 1, o);
- PROCESS_BMBT_UNLOCK_RETURN(1);
- case XR_DFSBNORANGE_OVERFLOW:
- do_warn(
-
- _("inode %llu - bad extent overflows - start %llu, end %llu, "
- "offset %llu\n"),
- ino, s, s + c - 1, o);
- PROCESS_BMBT_UNLOCK_RETURN(1);
- }
- if (o >= fs_max_file_offset) {
- do_warn(
- _("inode %llu - extent offset too large - start %llu, count %llu, "
- "offset %llu\n"),
- ino, s, c, o);
- PROCESS_BMBT_UNLOCK_RETURN(1);
- }
+ do_warn(_("zero length extent (off = %llu, "
+ "fsbno = %llu) in ino %llu\n"), o, s, ino);
+ goto done;
}
- /*
- * realtime file data fork
- */
- if (type == XR_INO_RTDATA && whichfork == XFS_DATA_FORK) {
+ if (type == XR_INO_RTDATA && whichfork == XFS_DATA_FORK) {
/*
- * XXX - verify that the blocks listed in the record
- * are multiples of an extent
+ * realtime bitmaps don't use AG locks, so returning
+ * immediately is fine for this code path.
*/
- if (XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb) == 0
- && (s % mp->m_sb.sb_rextsize != 0 ||
- c % mp->m_sb.sb_rextsize != 0)) {
- do_warn(
- _("malformed rt inode extent [%llu %llu] (fs rtext size = %u)\n"),
- s, c, mp->m_sb.sb_rextsize);
- PROCESS_BMBT_UNLOCK_RETURN(1);
- }
-
- /*
- * XXX - set the appropriate number of extents
- */
- for (b = s; b < s + c; b += mp->m_sb.sb_rextsize) {
- ext = (xfs_drtbno_t) b / mp->m_sb.sb_rextsize;
- if (XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb) &&
- flag && (b % mp->m_sb.sb_rextsize != 0)) {
- pwe = 1;
- } else {
- pwe = 0;
- }
-
- if (check_dups == 1) {
- if (search_rt_dup_extent(mp, ext) &&
- !pwe) {
- do_warn(
- _("data fork in rt ino %llu claims dup rt extent, off - %llu, "
- "start - %llu, count %llu\n"),
- ino, o, s, c);
- PROCESS_BMBT_UNLOCK_RETURN(1);
- }
- continue;
- }
-
- state = get_rtbno_state(mp, ext);
-
- switch (state) {
- case XR_E_FREE:
-/* XXX - turn this back on after we
- run process_rtbitmap() in phase2
- do_warn(
- _("%s fork in rt ino %llu claims free rt block %llu\n"),
- forkname, ino, ext);
-*/
- /* fall through ... */
- case XR_E_UNKNOWN:
- set_rtbno_state(mp, ext, XR_E_INUSE);
- break;
- case XR_E_BAD_STATE:
- do_error(
- _("bad state in rt block map %llu\n"), ext);
- abort();
- break;
- case XR_E_FS_MAP:
- case XR_E_INO:
- case XR_E_INUSE_FS:
- do_error(
- _("%s fork in rt inode %llu found metadata block %llu in %s bmap\n"),
- forkname, ino, ext, ftype);
- case XR_E_INUSE:
- if (pwe)
- break;
- case XR_E_MULT:
- set_rtbno_state(mp, ext, XR_E_MULT);
- do_warn(
- _("%s fork in rt inode %llu claims used rt block %llu\n"),
- forkname, ino, ext);
- PROCESS_BMBT_UNLOCK_RETURN(1);
- case XR_E_FREE1:
- default:
- do_error(
- _("illegal state %d in %s block map %llu\n"),
- state, ftype, b);
- }
- }
-
- /*
- * bump up the block counter
- */
- *tot += c;
-
+ if (process_rt_rec(mp, rp, ino, tot, check_dups))
+ return 1;
/*
* skip rest of loop processing since that's
* all for regular file forks and attr forks
continue;
}
-
/*
* regular file data fork or attribute fork
*/
+ switch (verify_dfsbno_range(mp, s, c)) {
+ case XR_DFSBNORANGE_VALID:
+ break;
+
+ case XR_DFSBNORANGE_BADSTART:
+ do_warn(_("inode %llu - bad extent starting "
+ "block number %llu, offset %llu\n"),
+ ino, s, o);
+ goto done;
+
+ case XR_DFSBNORANGE_BADEND:
+ do_warn(_("inode %llu - bad extent last block "
+ "number %llu, offset %llu\n"),
+ ino, s + c - 1, o);
+ goto done;
+
+ case XR_DFSBNORANGE_OVERFLOW:
+ do_warn(_("inode %llu - bad extent overflows - "
+ "start %llu, end %llu, offset %llu\n"),
+ ino, s, s + c - 1, o);
+ goto done;
+ }
+ if (o >= fs_max_file_offset) {
+ do_warn(_("inode %llu - extent offset too large - "
+ "start %llu, count %llu, offset %llu\n"),
+ ino, s, c, o);
+ goto done;
+ }
+
if (blkmapp && *blkmapp)
blkmap_set_ext(blkmapp, o, s, c);
/*
agno = XFS_FSB_TO_AGNO(mp, s);
agbno = XFS_FSB_TO_AGBNO(mp, s);
e = s + c;
- PROCESS_BMBT_LOCK(agno);
- for (b = s; b < e; b++, agbno++) {
- if (check_dups == 1) {
- /*
- * if we're just checking the bmap for dups,
- * return if we find one, otherwise, continue
- * checking each entry without setting the
- * block bitmap
- */
+ if (agno != locked_agno) {
+ if (locked_agno != -1)
+ pthread_mutex_unlock(&ag_locks[locked_agno]);
+ pthread_mutex_lock(&ag_locks[agno]);
+ locked_agno = agno;
+ }
+
+ if (check_dups) {
+ /*
+ * if we're just checking the bmap for dups,
+ * return if we find one, otherwise, continue
+ * checking each entry without setting the
+ * block bitmap
+ */
+ for (b = s; b < e; b++, agbno++) {
if (search_dup_extent(mp, agno, agbno)) {
- do_warn(
- _("%s fork in ino %llu claims dup extent, off - %llu, "
- "start - %llu, cnt %llu\n"),
+ do_warn(_("%s fork in ino %llu claims "
+ "dup extent, off - %llu, "
+ "start - %llu, cnt %llu\n"),
forkname, ino, o, s, c);
- PROCESS_BMBT_UNLOCK_RETURN(1);
+ goto done;
}
- continue;
- }
-
- /* FIX FOR BUG 653709 -- EKN
- * realtime attribute fork, should be valid block number
- * in regular data space, not realtime partion.
- */
- if (type == XR_INO_RTDATA && whichfork == XFS_ATTR_FORK) {
- if (mp->m_sb.sb_agcount < agno)
- PROCESS_BMBT_UNLOCK_RETURN(1);
}
+ *tot += c;
+ continue;
+ }
- /* Process in chunks of 16 (XR_BB_UNIT/XR_BB)
+ for (b = s; b < e; b++, agbno++) {
+ /*
+ * Process in chunks of 16 (XR_BB_UNIT/XR_BB)
* for common XR_E_UNKNOWN to XR_E_INUSE transition
*/
if (((agbno & XR_BB_MASK) == 0) && ((s + c - b) >= (XR_BB_UNIT/XR_BB))) {
}
state = get_agbno_state(mp, agno, agbno);
+
switch (state) {
case XR_E_FREE:
case XR_E_FREE1:
- do_warn(
- _("%s fork in ino %llu claims free block %llu\n"),
+ do_warn(_("%s fork in ino %llu claims free "
+ "block %llu\n"),
forkname, ino, (__uint64_t) b);
/* fall through ... */
case XR_E_UNKNOWN:
set_agbno_state(mp, agno, agbno, XR_E_INUSE);
break;
+
case XR_E_BAD_STATE:
do_error(_("bad state in block map %llu\n"), b);
- abort();
- break;
+
case XR_E_FS_MAP:
case XR_E_INO:
case XR_E_INUSE_FS:
- do_warn(
- _("%s fork in inode %llu claims metadata block %llu\n"),
+ do_warn(_("%s fork in inode %llu claims "
+ "metadata block %llu\n"),
forkname, ino, (__uint64_t) b);
- PROCESS_BMBT_UNLOCK_RETURN(1);
+ goto done;
+
case XR_E_INUSE:
case XR_E_MULT:
set_agbno_state(mp, agno, agbno, XR_E_MULT);
- do_warn(
- _("%s fork in %s inode %llu claims used block %llu\n"),
+ do_warn(_("%s fork in %s inode %llu claims "
+ "used block %llu\n"),
forkname, ftype, ino, (__uint64_t) b);
- PROCESS_BMBT_UNLOCK_RETURN(1);
+ goto done;
+
default:
- do_error(
- _("illegal state %d in block map %llu\n"),
+ do_error(_("illegal state %d in block map %llu\n"),
state, b);
- abort();
}
}
*tot += c;
}
-
- PROCESS_BMBT_UNLOCK_RETURN(0);
+ error = 0;
+done:
+ if (locked_agno != -1)
+ pthread_mutex_unlock(&ag_locks[locked_agno]);
+ return error;
}
/*
#ifndef _XR_DINODE_H
#define _XR_DINODE_H
+#include "prefetch.h"
+
struct blkmap;
int
xfs_agnumber_t agno);
void
process_aginodes(xfs_mount_t *mp,
+ prefetch_args_t *pf_args,
xfs_agnumber_t agno,
int check_dirs,
int check_dups,
#include "dinode.h"
#include "dir.h"
#include "bmap.h"
-#include "prefetch.h"
#if XFS_DIR_LEAF_MAPSIZE >= XFS_ATTR_LEAF_MAPSIZE
#define XR_DA_LEAF_MAPSIZE XFS_DIR_LEAF_MAPSIZE
node = NULL;
da_cursor->active = 0;
- if (do_prefetch && (whichfork == XFS_DATA_FORK))
- prefetch_dir1(mp, bno, da_cursor);
-
do {
/*
* read in each block along the way and set up cursor
#include "dir2.h"
#include "bmap.h"
#include "prefetch.h"
+#include "progress.h"
/*
* Tag bad directory entries with this.
xfs_buf_t *bparray[4];
xfs_buf_t **bplist;
xfs_dabuf_t *dabuf;
- int i;
+ int i, j;
int off;
+ int nblocks;
+
+ /*
+ * due to limitations in libxfs_cache, we need to read the
+ * blocks in fsblock size chunks
+ */
+
+ for (i = 0, nblocks = 0; i < nex; i++)
+ nblocks += bmp[i].blockcount;
- if (nex > (sizeof(bparray)/sizeof(xfs_buf_t *))) {
+ if (nblocks > (sizeof(bparray)/sizeof(xfs_buf_t *))) {
bplist = calloc(nex, sizeof(*bplist));
if (bplist == NULL) {
do_error(_("couldn't malloc dir2 buffer list\n"));
/* common case avoids calloc/free */
bplist = bparray;
}
- for (i = 0; i < nex; i++) {
- bplist[i] = libxfs_readbuf(mp->m_dev,
- XFS_FSB_TO_DADDR(mp, bmp[i].startblock),
- XFS_FSB_TO_BB(mp, bmp[i].blockcount), 0);
- if (!bplist[i])
- goto failed;
+ for (i = 0, j = 0; j < nex; j++) {
+ xfs_dfsbno_t bno;
+ int c;
+
+ bno = bmp[j].startblock;
+ for (c = 0; c < bmp[j].blockcount; c++, bno++) {
+#ifdef XR_PF_TRACE
+ pftrace("about to read off %llu",
+ (long long)XFS_FSB_TO_DADDR(mp, bno));
+#endif
+ bplist[i] = libxfs_readbuf(mp->m_dev,
+ XFS_FSB_TO_DADDR(mp, bno),
+ XFS_FSB_TO_BB(mp, 1), 0);
+ if (!bplist[i])
+ goto failed;
+#ifdef XR_PF_TRACE
+ pftrace("readbuf %p (%llu, %d)", bplist[i],
+ (long long)XFS_BUF_ADDR(bplist[i]),
+ XFS_BUF_COUNT(bplist[i]));
+#endif
+ i++;
+ }
}
- dabuf = malloc(XFS_DA_BUF_SIZE(nex));
+ ASSERT(i == nblocks);
+
+ dabuf = malloc(XFS_DA_BUF_SIZE(nblocks));
if (dabuf == NULL) {
do_error(_("couldn't malloc dir2 buffer header\n"));
exit(1);
}
dabuf->dirty = 0;
dabuf->nbuf = nex;
- if (nex == 1) {
+ if (nblocks == 1) {
bp = bplist[0];
dabuf->bbcount = (short)BTOBB(XFS_BUF_COUNT(bp));
dabuf->data = XFS_BUF_PTR(bp);
do_error(_("couldn't malloc dir2 buffer data\n"));
exit(1);
}
- for (i = off = 0; i < nex; i++, off += XFS_BUF_COUNT(bp)) {
+ for (i = off = 0; i < nblocks; i++, off += XFS_BUF_COUNT(bp)) {
bp = bplist[i];
bcopy(XFS_BUF_PTR(bp), (char *)dabuf->data + off,
XFS_BUF_COUNT(bp));
free(bplist);
return dabuf;
failed:
- for (i = 0; i < nex; i++)
+ for (i = 0; i < nblocks; i++)
libxfs_putbuf(bplist[i]);
if (bplist != bparray)
free(bplist);
bcopy(dabuf->bps, bplist, nbuf * sizeof(*bplist));
}
da_buf_done(dabuf);
- for (i = 0; i < nbuf; i++)
+ for (i = 0; i < nbuf; i++) {
+#ifdef XR_PF_TRACE
+ pftrace("putbuf %p (%llu)", bplist[i], (long long)XFS_BUF_ADDR(bplist[i]));
+#endif
libxfs_putbuf(bplist[i]);
+ }
if (bplist != &bp)
free(bplist);
}
sfp = &dip->di_u.di_dir2sf;
max_size = XFS_DFORK_DSIZE(dip, mp);
- num_entries = INT_GET(sfp->hdr.count, ARCH_CONVERT);
+ num_entries = sfp->hdr.count;
ino_dir_size = INT_GET(dip->di_core.di_size, ARCH_CONVERT);
offset = XFS_DIR2_DATA_FIRST_OFFSET;
bad_offset = *repair = 0;
int t;
bmap_ext_t lbmp;
- if (do_prefetch)
- prefetch_dir2(mp, blkmap);
-
*repair = *dot = *dotdot = good = 0;
*parent = NULLFSINO;
ndbno = NULLDFILOFF;
+++ /dev/null
-/*
- * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <libxfs.h>
-#include "dir_stack.h"
-#include "err_protos.h"
-#include "threads.h"
-
-/*
- * a directory stack for holding directories while
- * we traverse filesystem hierarchy subtrees.
- * names are kind of misleading as this is really
- * implemented as an inode stack. so sue me...
- */
-
-static dir_stack_t dirstack_freelist;
-static int dirstack_init = 0;
-static pthread_mutex_t dirstack_mutex;
-static pthread_mutexattr_t dirstack_mutexattr;
-
-
-void
-dir_stack_init(dir_stack_t *stack)
-{
- stack->cnt = 0;
- stack->head = NULL;
-
- if (dirstack_init == 0) {
- dirstack_init = 1;
- PREPAIR_MTX_ATTR_INIT(&dirstack_mutexattr);
-#ifdef PTHREAD_MUTEX_SPINBLOCK_NP
- PREPAIR_MTX_ATTR_SET(&dirstack_mutexattr, PTHREAD_MUTEX_SPINBLOCK_NP);
-#endif
- PREPAIR_MTX_LOCK_INIT(&dirstack_mutex, &dirstack_mutexattr);
- dir_stack_init(&dirstack_freelist);
- }
-
- stack->cnt = 0;
- stack->head = NULL;
-
- return;
-}
-
-static void
-dir_stack_push(dir_stack_t *stack, dir_stack_elem_t *elem)
-{
- ASSERT(stack->cnt > 0 || (stack->cnt == 0 && stack->head == NULL));
-
- elem->next = stack->head;
- stack->head = elem;
- stack->cnt++;
-
- return;
-}
-
-static dir_stack_elem_t *
-dir_stack_pop(dir_stack_t *stack)
-{
- dir_stack_elem_t *elem;
-
- if (stack->cnt == 0) {
- ASSERT(stack->head == NULL);
- return(NULL);
- }
-
- elem = stack->head;
-
- ASSERT(elem != NULL);
-
- stack->head = elem->next;
- elem->next = NULL;
- stack->cnt--;
-
- return(elem);
-}
-
-void
-push_dir(dir_stack_t *stack, xfs_ino_t ino)
-{
- dir_stack_elem_t *elem;
-
- PREPAIR_MTX_LOCK(&dirstack_mutex);
- if (dirstack_freelist.cnt == 0) {
- if ((elem = malloc(sizeof(dir_stack_elem_t))) == NULL) {
- PREPAIR_MTX_UNLOCK(&dirstack_mutex);
- do_error(
- _("couldn't malloc dir stack element, try more swap\n"));
- exit(1);
- }
- } else {
- elem = dir_stack_pop(&dirstack_freelist);
- }
- PREPAIR_MTX_UNLOCK(&dirstack_mutex);
-
- elem->ino = ino;
-
- dir_stack_push(stack, elem);
-
- return;
-}
-
-xfs_ino_t
-pop_dir(dir_stack_t *stack)
-{
- dir_stack_elem_t *elem;
- xfs_ino_t ino;
-
- elem = dir_stack_pop(stack);
-
- if (elem == NULL)
- return(NULLFSINO);
-
- ino = elem->ino;
- elem->ino = NULLFSINO;
-
- PREPAIR_MTX_LOCK(&dirstack_mutex);
- dir_stack_push(&dirstack_freelist, elem);
- PREPAIR_MTX_UNLOCK(&dirstack_mutex);
-
- return(ino);
-}
+++ /dev/null
-/*
- * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-typedef struct dir_stack_elem {
- xfs_ino_t ino;
- struct dir_stack_elem *next;
-} dir_stack_elem_t;
-
-typedef struct dir_stack {
- int cnt;
- dir_stack_elem_t *head;
-} dir_stack_t;
-
-
-void dir_stack_init(dir_stack_t *stack);
-
-void push_dir(dir_stack_t *stack, xfs_ino_t ino);
-xfs_ino_t pop_dir(dir_stack_t *stack);
/* configuration vars -- fs geometry dependent */
EXTERN int inodes_per_block;
-EXTERN int inodes_per_cluster; /* inodes per inode buffer */
+EXTERN int inodes_per_cluster;
EXTERN unsigned int glob_agcount;
EXTERN int chunks_pblock; /* # of 64-ino chunks per allocation */
EXTERN int max_symlink_blocks;
extern size_t ts_dir_freemap_size;
extern size_t ts_attr_freemap_size;
-EXTERN pthread_rwlock_t *per_ag_lock;
+EXTERN pthread_mutex_t *ag_locks;
-EXTERN int report_interval;
-EXTERN __uint64_t *prog_rpt_done;
+EXTERN int report_interval;
+EXTERN __uint64_t *prog_rpt_done;
+
+#ifdef XR_PF_TRACE
+EXTERN FILE *pf_trace_file;
+#endif
EXTERN int ag_stride;
+EXTERN int thread_count;
#endif /* _XFS_REPAIR_GLOBAL_H */
size_t size = 0;
ba_bmap = (__uint64_t**)malloc(agno*sizeof(__uint64_t *));
- if (!ba_bmap) {
+ if (!ba_bmap)
do_error(_("couldn't allocate block map pointers\n"));
- return;
- }
- PREPAIR_RW_LOCK_ALLOC(per_ag_lock, agno);
+ ag_locks = malloc(agno * sizeof(pthread_mutex_t));
+ if (!ag_locks)
+ do_error(_("couldn't allocate block map locks\n"));
+
for (i = 0; i < agno; i++) {
size = roundup((numblocks+(NBBY/XR_BB)-1) / (NBBY/XR_BB),
sizeof(__uint64_t));
return;
}
bzero(ba_bmap[i], size);
- PREPAIR_RW_LOCK_INIT(&per_ag_lock[i], NULL);
+ pthread_mutex_init(&ag_locks[i], NULL);
}
if (rtblocks == 0) {
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
+#ifndef XFS_REPAIR_INCORE_H
+#define XFS_REPAIR_INCORE_H
+
+#include "avl.h"
/*
* contains definition information. implementation (code)
* is spread out in separate files.
#define add_inode_refchecked(ino, ino_rec, ino_offset) \
XFS_INOPROC_SET_PROC((ino_rec), (ino_offset))
#define is_inode_refchecked(ino, ino_rec, ino_offset) \
- (XFS_INOPROC_IS_PROC(ino_rec, ino_offset) == 0LL ? 0 : 1)
+ (XFS_INOPROC_IS_PROC(ino_rec, ino_offset) != 0LL)
#else
void add_inode_refchecked(xfs_ino_t ino,
ino_tree_node_t *ino_rec, int ino_offset);
} bmap_cursor_t;
void init_bm_cursor(bmap_cursor_t *cursor, int num_level);
+
+#endif /* XFS_REPAIR_INCORE_H */
/*
* locks.
*/
-static pthread_rwlock_t ext_flist_lock;
-static pthread_rwlock_t rt_ext_tree_lock;
-static pthread_rwlock_t rt_ext_flist_lock;
+static pthread_mutex_t ext_flist_lock;
+static pthread_mutex_t rt_ext_tree_lock;
+static pthread_mutex_t rt_ext_flist_lock;
/*
* extent tree stuff is avl trees of duplicate extents,
extent_tree_node_t *new;
extent_alloc_rec_t *rec;
- PREPAIR_RW_WRITE_LOCK(&ext_flist_lock);
+ pthread_mutex_lock(&ext_flist_lock);
if (ext_flist.cnt == 0) {
ASSERT(ext_flist.list == NULL);
ext_flist.list = (extent_tree_node_t *) new->avl_node.avl_nextino;
ext_flist.cnt--;
new->avl_node.avl_nextino = NULL;
- PREPAIR_RW_UNLOCK(&ext_flist_lock);
+ pthread_mutex_unlock(&ext_flist_lock);
/* initialize node */
void
release_extent_tree_node(extent_tree_node_t *node)
{
- PREPAIR_RW_WRITE_LOCK(&ext_flist_lock);
+ pthread_mutex_lock(&ext_flist_lock);
node->avl_node.avl_nextino = (avlnode_t *) ext_flist.list;
ext_flist.list = node;
ext_flist.cnt++;
- PREPAIR_RW_UNLOCK(&ext_flist_lock);
+ pthread_mutex_unlock(&ext_flist_lock);
return;
}
* avl tree code doesn't handle dups so insert
* onto linked list in increasing startblock order
*
- * when called from mk_incore_fstree,
+ * when called from mk_incore_fstree,
* startblock is in increasing order.
* current is an "anchor" node.
* quick check if the new ext goes to the end.
- * if so, append at the end, using the last field
+ * if so, append at the end, using the last field
* of the "anchor".
*/
ASSERT(current->last != NULL);
return;
}
- /*
+ /*
* scan, to find the proper location for new entry.
* this scan is *very* expensive and gets worse with
* with increasing entries.
rt_extent_tree_node_t *new;
rt_extent_alloc_rec_t *rec;
- PREPAIR_RW_WRITE_LOCK(&rt_ext_flist_lock);
+ pthread_mutex_lock(&rt_ext_flist_lock);
if (rt_ext_flist.cnt == 0) {
ASSERT(rt_ext_flist.list == NULL);
rt_ext_flist.list = (rt_extent_tree_node_t *) new->avl_node.avl_nextino;
rt_ext_flist.cnt--;
new->avl_node.avl_nextino = NULL;
- PREPAIR_RW_UNLOCK(&rt_ext_flist_lock);
+ pthread_mutex_unlock(&rt_ext_flist_lock);
/* initialize node */
xfs_drtbno_t new_startblock;
xfs_extlen_t new_blockcount;
- PREPAIR_RW_WRITE_LOCK(&rt_ext_tree_lock);
+ pthread_mutex_lock(&rt_ext_tree_lock);
avl64_findranges(rt_ext_tree_ptr, startblock - 1,
startblock + blockcount + 1,
(avl64node_t **) &first, (avl64node_t **) &last);
do_error(_("duplicate extent range\n"));
}
- PREPAIR_RW_UNLOCK(&rt_ext_tree_lock);
+ pthread_mutex_unlock(&rt_ext_tree_lock);
return;
}
*/
if (ext->rt_startblock <= startblock &&
ext->rt_blockcount >= blockcount) {
- PREPAIR_RW_UNLOCK(&rt_ext_tree_lock);
+ pthread_mutex_unlock(&rt_ext_tree_lock);
return;
}
/*
do_error(_("duplicate extent range\n"));
}
- PREPAIR_RW_UNLOCK(&rt_ext_tree_lock);
+ pthread_mutex_unlock(&rt_ext_tree_lock);
return;
}
{
int ret;
- PREPAIR_RW_READ_LOCK(&rt_ext_tree_lock);
+ pthread_mutex_lock(&rt_ext_tree_lock);
if (avl64_findrange(rt_ext_tree_ptr, bno) != NULL)
ret = 1;
else
ret = 0;
- PREPAIR_RW_UNLOCK(&rt_ext_tree_lock);
+ pthread_mutex_unlock(&rt_ext_tree_lock);
return(ret);
}
ba_list = NULL;
rt_ba_list = NULL;
- PREPAIR_RW_LOCK_INIT(&ext_flist_lock, NULL);
- PREPAIR_RW_LOCK_INIT(&rt_ext_tree_lock, NULL);
- PREPAIR_RW_LOCK_INIT(&rt_ext_flist_lock, NULL);
+ pthread_mutex_init(&ext_flist_lock, NULL);
+ pthread_mutex_init(&rt_ext_tree_lock, NULL);
+ pthread_mutex_init(&rt_ext_flist_lock, NULL);
if ((extent_tree_ptrs = malloc(agcount *
sizeof(avltree_desc_t *))) == NULL)
#include "threads.h"
#include "err_protos.h"
-static pthread_rwlock_t ino_flist_lock;
+static pthread_mutex_t ino_flist_lock;
extern avlnode_t *avl_firstino(avlnode_t *root);
/*
ino_tree_node_t *ino_rec;
avlnode_t *node;
- PREPAIR_RW_WRITE_LOCK(&ino_flist_lock);
+ pthread_mutex_lock(&ino_flist_lock);
if (ino_flist.cnt == 0) {
ASSERT(ino_flist.list == NULL);
ino_flist.cnt--;
node = &ino_rec->avl_node;
node->avl_nextino = node->avl_forw = node->avl_back = NULL;
- PREPAIR_RW_UNLOCK(&ino_flist_lock);
+ pthread_mutex_unlock(&ino_flist_lock);
/* initialize node */
ino_rec->avl_node.avl_forw = NULL;
ino_rec->avl_node.avl_back = NULL;
- PREPAIR_RW_WRITE_LOCK(&ino_flist_lock);
+ pthread_mutex_lock(&ino_flist_lock);
if (ino_flist.list != NULL) {
ASSERT(ino_flist.cnt > 0);
ino_rec->avl_node.avl_nextino = (avlnode_t *) ino_flist.list;
free(ino_rec->ino_un.ex_data);
}
- PREPAIR_RW_UNLOCK(&ino_flist_lock);
-
- return;
+ pthread_mutex_unlock(&ino_flist_lock);
}
/*
* set cache entry
*/
last_rec[agno] = ino_rec;
-
- return;
}
/*
clear_uncertain_ino_cache(xfs_agnumber_t agno)
{
last_rec[agno] = NULL;
-
- return;
}
free_inode_rec(xfs_agnumber_t agno, ino_tree_node_t *ino_rec)
{
free_ino_tree_node(ino_rec);
-
- return;
}
void
avl_findranges(inode_tree_ptrs[agno], start_ino,
end_ino, (avlnode_t **) first, (avlnode_t **) last);
- return;
}
/*
#endif
irec->ino_un.plist->pentries[target] = parent;
irec->ino_un.plist->pmask |= (1LL << offset);
-
- return;
}
xfs_ino_t
int i;
int agcount = mp->m_sb.sb_agcount;
- PREPAIR_RW_LOCK_INIT(&ino_flist_lock, NULL);
+ pthread_mutex_init(&ino_flist_lock, NULL);
if ((inode_tree_ptrs = malloc(agcount *
sizeof(avltree_desc_t *))) == NULL)
do_error(_("couldn't malloc inode tree descriptor table\n"));
bzero(last_rec, sizeof(ino_tree_node_t *) * agcount);
full_ino_ex_data = 0;
-
- return;
}
#ifdef XR_INO_REF_DEBUG
XFS_INOPROC_SET_PROC((ino_rec), (ino_offset));
ASSERT(is_inode_refchecked(ino, ino_rec, ino_offset));
-
- return;
}
int
#include "protos.h"
#include "err_protos.h"
#include "pthread.h"
+#include "avl.h"
+#include "dir.h"
+#include "incore.h"
#include "prefetch.h"
+#include "radix-tree.h"
#include <sys/resource.h>
static pthread_key_t dirbuf_key;
ts_create();
ts_init();
increase_rlimit();
- if (do_prefetch) {
- do_prefetch = libxfs_lio_init();
- if (do_prefetch)
- libxfs_lio_allocate();
- }
+ radix_tree_init();
}
#include "dinode.h"
#include "threads.h"
#include "progress.h"
+#include "prefetch.h"
/*
* walks an unlinked list, returns 1 on an error (bogus pointer) or
add_aginode_uncertain(agno, current_ino, 1);
agbno = XFS_AGINO_TO_AGBNO(mp, current_ino);
- PREPAIR_RW_WRITE_LOCK(&per_ag_lock[agno]);
+ pthread_mutex_lock(&ag_locks[agno]);
switch (state = get_agbno_state(mp,
agno, agbno)) {
case XR_E_UNKNOWN:
case XR_E_FREE1:
set_agbno_state(mp, agno, agbno,
XR_E_INO);
- PREPAIR_RW_UNLOCK(&per_ag_lock[agno]);
break;
case XR_E_BAD_STATE:
- PREPAIR_RW_UNLOCK(&per_ag_lock[agno]);
do_error(_(
"bad state in block map %d\n"),
state);
- abort();
break;
default:
/*
*/
set_agbno_state(mp, agno, agbno,
XR_E_INO);
- PREPAIR_RW_UNLOCK(&per_ag_lock[agno]);
break;
}
+ pthread_mutex_unlock(&ag_locks[agno]);
}
current_ino = dip->di_next_unlinked;
} else {
libxfs_putbuf(bp);
}
-void
-parallel_p3_process_aginodes(xfs_mount_t *mp, xfs_agnumber_t agno)
+static void
+process_ag_func(
+ work_queue_t *wq,
+ xfs_agnumber_t agno,
+ void *arg)
{
/*
* turn on directory processing (inode discovery) and
* attribute processing (extra_attr_check)
*/
+ wait_for_inode_prefetch(arg);
do_log(_(" - agno = %d\n"), agno);
- process_aginodes(mp, agno, 1, 0, 1);
+ process_aginodes(wq->mp, arg, agno, 1, 0, 1);
+ cleanup_inode_prefetch(arg);
+}
+
+static void
+process_ags(
+ xfs_mount_t *mp)
+{
+ int i, j;
+ xfs_agnumber_t agno;
+ work_queue_t *queues;
+ prefetch_args_t *pf_args[2];
+
+ queues = malloc(thread_count * sizeof(work_queue_t));
+
+ if (ag_stride) {
+ /*
+ * create one worker thread for each segment of the volume
+ */
+ for (i = 0, agno = 0; i < thread_count; i++) {
+ create_work_queue(&queues[i], mp, 1);
+ pf_args[0] = NULL;
+ for (j = 0; j < ag_stride && agno < mp->m_sb.sb_agcount;
+ j++, agno++) {
+ pf_args[0] = start_inode_prefetch(agno, 0, pf_args[0]);
+ queue_work(&queues[i], process_ag_func, agno, pf_args[0]);
+ }
+ }
+ /*
+ * wait for workers to complete
+ */
+ for (i = 0; i < thread_count; i++)
+ destroy_work_queue(&queues[i]);
+ } else {
+ queues[0].mp = mp;
+ pf_args[0] = start_inode_prefetch(0, 0, NULL);
+ for (i = 0; i < mp->m_sb.sb_agcount; i++) {
+ pf_args[(~i) & 1] = start_inode_prefetch(i + 1, 0,
+ pf_args[i & 1]);
+ process_ag_func(&queues[0], i, pf_args[i & 1]);
+ }
+ }
+ free(queues);
}
void
phase3(xfs_mount_t *mp)
{
- int i, j;
+ int i, j;
do_log(_("Phase 3 - for each AG...\n"));
if (!no_modify)
" - process known inodes and perform inode discovery...\n"));
set_progress_msg(PROG_FMT_PROCESS_INO, (__uint64_t) mp->m_sb.sb_icount);
- if (ag_stride) {
- int steps = (mp->m_sb.sb_agcount + ag_stride - 1) / ag_stride;
- for (i = 0; i < steps; i++)
- for (j = i; j < mp->m_sb.sb_agcount; j += ag_stride)
- queue_work(parallel_p3_process_aginodes, mp, j);
- } else {
- for (i = 0; i < mp->m_sb.sb_agcount; i++)
- parallel_p3_process_aginodes(mp, i);
- }
- wait_for_workers();
+
+ process_ags(mp);
+
print_final_rpt();
/*
#include "dir2.h"
#include "threads.h"
#include "progress.h"
+#include "prefetch.h"
/*
}
-void
-parallel_p4_process_aginodes(xfs_mount_t *mp, xfs_agnumber_t agno)
+static void
+process_ag_func(
+ work_queue_t *wq,
+ xfs_agnumber_t agno,
+ void *arg)
{
+ wait_for_inode_prefetch(arg);
do_log(_(" - agno = %d\n"), agno);
- process_aginodes(mp, agno, 0, 1, 0);
+ process_aginodes(wq->mp, arg, agno, 0, 1, 0);
+ cleanup_inode_prefetch(arg);
/*
* now recycle the per-AG duplicate extent records
release_dup_extent_tree(agno);
}
+static void
+process_ags(
+ xfs_mount_t *mp)
+{
+ int i, j;
+ work_queue_t *queues;
+ prefetch_args_t *pf_args[2];
+
+ queues = malloc(thread_count * sizeof(work_queue_t));
+
+ if (!libxfs_bcache_overflowed()) {
+ queues[0].mp = mp;
+ create_work_queue(&queues[0], mp, libxfs_nproc());
+ for (i = 0; i < mp->m_sb.sb_agcount; i++)
+ queue_work(&queues[0], process_ag_func, i, NULL);
+ destroy_work_queue(&queues[0]);
+ } else {
+ if (ag_stride) {
+ /*
+ * create one worker thread for each segment of the volume
+ */
+ for (i = 0; i < thread_count; i++) {
+ create_work_queue(&queues[i], mp, 1);
+ pf_args[0] = NULL;
+ for (j = i; j < mp->m_sb.sb_agcount; j += ag_stride) {
+ pf_args[0] = start_inode_prefetch(j, 0, pf_args[0]);
+ queue_work(&queues[i], process_ag_func, j, pf_args[0]);
+ }
+ }
+ /*
+ * wait for workers to complete
+ */
+ for (i = 0; i < thread_count; i++)
+ destroy_work_queue(&queues[i]);
+ } else {
+ queues[0].mp = mp;
+ pf_args[0] = start_inode_prefetch(0, 0, NULL);
+ for (i = 0; i < mp->m_sb.sb_agcount; i++) {
+ pf_args[(~i) & 1] = start_inode_prefetch(i + 1,
+ 0, pf_args[i & 1]);
+ process_ag_func(&queues[0], i, pf_args[i & 1]);
+ }
+ }
+ }
+ free(queues);
+}
+
+
void
phase4(xfs_mount_t *mp)
{
* and attribute processing is turned OFF since we did that
* already in phase 3.
*/
- if (ag_stride) {
- int steps = (mp->m_sb.sb_agcount + ag_stride - 1) / ag_stride;
- for (i = 0; i < steps; i++)
- for (j = i; j < mp->m_sb.sb_agcount; j += ag_stride)
- queue_work(parallel_p4_process_aginodes, mp, j);
- } else {
- for (i = 0; i < mp->m_sb.sb_agcount; i++)
- parallel_p4_process_aginodes(mp, i);
- }
-
- wait_for_workers();
+ process_ags(mp);
print_final_rpt();
/*
set_inode_used(irec, i);
}
-void
-phase5_function(xfs_mount_t *mp, xfs_agnumber_t agno)
+static void
+phase5_func(
+ xfs_mount_t *mp,
+ xfs_agnumber_t agno)
{
__uint64_t num_inos;
__uint64_t num_free_inos;
void
phase5(xfs_mount_t *mp)
{
- xfs_agnumber_t agno;
+ xfs_agnumber_t agno;
do_log(_("Phase 5 - rebuild AG headers and trees...\n"));
set_progress_msg(PROG_FMT_REBUILD_AG, (__uint64_t )glob_agcount);
if (sb_fdblocks_ag == NULL)
do_error(_("cannot alloc sb_fdblocks_ag buffers\n"));
- for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
- queue_work(phase5_function, mp, agno);
- }
- wait_for_workers();
+ for (agno = 0; agno < mp->m_sb.sb_agcount; agno++)
+ phase5_func(mp, agno);
+
print_final_rpt();
/* aggregate per ag counters */
#include "incore.h"
#include "dir.h"
#include "dir2.h"
-#include "dir_stack.h"
#include "protos.h"
#include "err_protos.h"
#include "dinode.h"
#include "prefetch.h"
#include "progress.h"
+#include "threads.h"
#include "versions.h"
static struct cred zerocr;
static struct fsxattr zerofsx;
static xfs_ino_t orphanage_ino;
-static xfs_inode_t *orphanage_ip;
/*
* Data structures and routines to keep track of directory entries
int hsize;
hsize = size / (16 * 4);
- if (hsize > 1024)
- hsize = 1024;
+ if (hsize > 65536)
+ hsize = 63336;
else if (hsize < 16)
hsize = 16;
if ((hashtab = calloc(DIR_HASH_TAB_SIZE(hsize), 1)) == NULL)
xfs_ino_t ino, /* inode # to be moved */
int isa_dir) /* 1 if inode is a directory */
{
+ xfs_inode_t *orphanage_ip;
xfs_ino_t entry_ino_num;
xfs_inode_t *ino_p;
xfs_trans_t *tp;
fnamelen = snprintf(fname, sizeof(fname), "%llu",
(unsigned long long)ino);
- ASSERT(orphanage_ip != NULL);
+ err = libxfs_iget(mp, NULL, orphanage_ino, 0, &orphanage_ip, 0);
+ if (err)
+ do_error(_("%d - couldn't iget orphanage inode\n"), err);
/*
* Make sure the filename is unique in the lost+found
*/
* Returns the fsbno of the first (leftmost) block in the directory leaf.
* sets *bno to the directory block # corresponding to the returned fsbno.
*/
-xfs_dfsbno_t
+static xfs_dfsbno_t
map_first_dblock_fsbno(xfs_mount_t *mp,
xfs_ino_t ino,
xfs_inode_t *ip,
int i;
int error;
char *ftype;
- xfs_fsblock_t fblock2;
/*
* traverse down left-side of tree until we hit the
if (XFS_SB_VERSION_HASDIRV2(&mp->m_sb))
return(fsbno);
- if (do_prefetch) {
- fblock2 = NULLFSBLOCK;
- prefetch_p6_dir1(mp, ino, ip, 0, &fblock2);
- }
-
do {
/*
* walk down left side of btree, release buffers as you
*
* this routine can NOT be called if running in no modify mode
*/
-int
+static int
prune_lf_dir_entry(xfs_mount_t *mp, xfs_ino_t ino, xfs_inode_t *ip,
xfs_dahash_t *hashval)
{
* process a leaf block, also checks for .. entry
* and corrects it to match what we think .. should be
*/
-void
+static void
lf_block_dir_entry_check(xfs_mount_t *mp,
xfs_ino_t ino,
xfs_dir_leafblock_t *leaf,
int *dirty,
int *num_illegal,
int *need_dot,
- dir_stack_t *stack,
ino_tree_node_t *current_irec,
int current_ino_offset,
dir_hash_tab_t *hashtab,
} else if (parent == ino) {
add_inode_reached(irec, ino_offset);
add_inode_ref(current_irec, current_ino_offset);
-
- if (!do_prefetch && !is_inode_refchecked(lino, irec, ino_offset))
- push_dir(stack, lino);
} else {
junkit = 1;
do_warn(
* happen in file blocks. the inode size and other core info
* is already correct, it's just the leaf entries that get altered.
*/
-void
+static void
longform_dir_entry_check(xfs_mount_t *mp,
xfs_ino_t ino,
xfs_inode_t *ip,
int *num_illegal,
int *need_dot,
- dir_stack_t *stack,
ino_tree_node_t *irec,
int ino_offset,
dir_hash_tab_t *hashtab)
if (!skipit)
lf_block_dir_entry_check(mp, ino, leaf, &dirty,
- num_illegal, need_dot, stack, irec,
+ num_illegal, need_dot, irec,
ino_offset, hashtab, da_bno);
da_bno = INT_GET(leaf->hdr.info.forw, ARCH_CONVERT);
xfs_fileoff_t lastblock;
xfs_fsblock_t firstblock;
xfs_bmap_free_t flist;
- xfs_ino_t parentino;
- xfs_inode_t *pip;
+ xfs_inode_t pip;
int byhash;
dir_hash_ent_t *p;
int committed;
/*
* first attempt to locate the parent inode, if it can't be found,
- * we'll use the lost+found inode
+ * set it to the root inode and it'll be adjusted or fixed later
+ * if incorrect (the inode number here needs to be valid for the
+ * libxfs_dir2_init() call).
*/
byhash = DIR_HASH_FUNC(hashtab, libxfs_da_hashname((uchar_t*)"..", 2));
- parentino = orphanage_ino;
+ pip.i_ino = mp->m_sb.sb_rootino;
for (p = hashtab->byhash[byhash]; p; p = p->nextbyhash) {
if (p->namelen == 2 && p->name[0] == '.' && p->name[1] == '.') {
- parentino = p->inum;
+ pip.i_ino = p->inum;
break;
}
}
do_error(_("xfs_bmap_last_offset failed -- error - %d\n"),
error);
- /* re-init the directory to shortform */
- if ((error = libxfs_trans_iget(mp, tp, parentino, 0, 0, &pip))) {
- do_warn(
- _("couldn't iget parent inode %llu -- error - %d\n"),
- parentino, error);
- /* we'll try to use the orphanage ino then */
- parentino = orphanage_ino;
- if ((error = libxfs_trans_iget(mp, tp, parentino, 0, 0, &pip)))
- do_error(
- _("couldn't iget lost+found inode %llu -- error - %d\n"),
- parentino, error);
- }
-
/* free all data, leaf, node and freespace blocks */
if ((error = libxfs_bunmapi(tp, ip, 0, lastblock,
ASSERT(done);
- libxfs_dir2_init(tp, ip, pip);
+ libxfs_dir2_init(tp, ip, &pip);
error = libxfs_bmap_finish(&tp, &flist, firstblock, &committed);
xfs_inode_t *ip,
int *num_illegal,
int *need_dot,
- dir_stack_t *stack,
ino_tree_node_t *current_irec,
int current_ino_offset,
xfs_dabuf_t **bpp,
ptr += XFS_DIR2_DATA_ENTSIZE(dep->namelen);
inum = INT_GET(dep->inumber, ARCH_CONVERT);
lastfree = 0;
-
/*
* skip bogus entries (leading '/'). they'll be deleted
* later. must still log it, else we leak references to
libxfs_dir2_data_log_entry(tp, bp, dep);
continue;
}
+
bcopy(dep->name, fname, dep->namelen);
fname[dep->namelen] = '\0';
ASSERT(inum != NULLFSINO);
} else if (parent == ip->i_ino) {
add_inode_reached(irec, ino_offset);
add_inode_ref(current_irec, current_ino_offset);
- if (!do_prefetch && !is_inode_refchecked(inum, irec, ino_offset))
- push_dir(stack, inum);
} else {
junkit = 1;
do_warn(
/*
* Check contents of leaf-form block.
*/
-int
+static int
longform_dir2_check_leaf(
xfs_mount_t *mp,
xfs_inode_t *ip,
* Check contents of the node blocks (leaves)
* Looks for matching hash values for the data entries.
*/
-int
+static int
longform_dir2_check_node(
xfs_mount_t *mp,
xfs_inode_t *ip,
* destroy the entry and create a new one with recovered name/inode pairs.
* (ie. get libxfs to do all the grunt work)
*/
-void
+static void
longform_dir2_entry_check(xfs_mount_t *mp,
xfs_ino_t ino,
xfs_inode_t *ip,
int *num_illegal,
int *need_dot,
- dir_stack_t *stack,
ino_tree_node_t *irec,
int ino_offset,
dir_hash_tab_t *hashtab)
libxfs_dir2_isblock(NULL, ip, &isblock);
libxfs_dir2_isleaf(NULL, ip, &isleaf);
- if (do_prefetch && !isblock)
- prefetch_p6_dir2(mp, ip);
-
/* check directory "data" blocks (ie. name/inode pairs) */
for (da_bno = 0, next_da_bno = 0;
next_da_bno != NULLFILEOFF && da_bno < mp->m_dirleafblk;
continue; /* try and read all "data" blocks */
}
longform_dir2_entry_check_data(mp, ip, num_illegal, need_dot,
- stack, irec, ino_offset, &bplist[db], hashtab,
+ irec, ino_offset, &bplist[db], hashtab,
&freetab, da_bno, isblock);
}
fixit = (*num_illegal != 0) || dir2_is_badino(ino);
* shortform directory processing routines -- entry verification and
* bad entry deletion (pruning).
*/
-void
+static void
shortform_dir_entry_check(xfs_mount_t *mp,
xfs_ino_t ino,
xfs_inode_t *ip,
int *ino_dirty,
- dir_stack_t *stack,
ino_tree_node_t *current_irec,
int current_ino_offset,
dir_hash_tab_t *hashtab)
} else if (parent == ino) {
add_inode_reached(irec, ino_offset);
add_inode_ref(current_irec, current_ino_offset);
-
- if (!do_prefetch && !is_inode_refchecked(lino, irec,
- ino_offset))
- push_dir(stack, lino);
} else {
junkit = 1;
do_warn(_("entry \"%s\" in dir %llu not "
}
/* ARGSUSED */
-void
+static void
prune_sf_dir_entry(xfs_mount_t *mp, xfs_ino_t ino, xfs_inode_t *ip)
{
/* REFERENCED */
* shortform directory v2 processing routines -- entry verification and
* bad entry deletion (pruning).
*/
-void
+static void
shortform_dir2_entry_check(xfs_mount_t *mp,
xfs_ino_t ino,
xfs_inode_t *ip,
int *ino_dirty,
- dir_stack_t *stack,
ino_tree_node_t *current_irec,
int current_ino_offset,
dir_hash_tab_t *hashtab)
} else if (parent == ino) {
add_inode_reached(irec, ino_offset);
add_inode_ref(current_irec, current_ino_offset);
-
- if (!do_prefetch && !is_inode_refchecked(lino, irec,
- ino_offset))
- push_dir(stack, lino);
} else {
junkit = 1;
do_warn(_("entry \"%s\" in directory inode %llu"
}
/*
- * processes all directories reachable via the inodes on the stack
- * returns 0 if things are good, 1 if there's a problem
+ * processes all reachable inodes in directories
*/
-void
-process_dirstack(xfs_mount_t *mp, dir_stack_t *stack)
+static void
+process_dir_inode(
+ xfs_mount_t *mp,
+ xfs_ino_t ino,
+ ino_tree_node_t *irec,
+ int ino_offset)
{
xfs_bmap_free_t flist;
xfs_fsblock_t first;
- xfs_ino_t ino;
xfs_inode_t *ip;
xfs_trans_t *tp;
xfs_dahash_t hashval;
- ino_tree_node_t *irec;
dir_hash_tab_t *hashtab;
- int ino_offset, need_dot, committed;
+ int need_dot, committed;
int dirty, num_illegal, error, nres;
/*
- * pull directory inode # off directory stack
- *
* open up directory inode, check all entries,
* then call prune_dir_entries to remove all
* remaining illegal directory entries.
*/
- while ((ino = pop_dir(stack)) != NULLFSINO) {
- irec = find_inode_rec(XFS_INO_TO_AGNO(mp, ino),
- XFS_INO_TO_AGINO(mp, ino));
- ASSERT(irec != NULL);
+ ASSERT(!is_inode_refchecked(ino, irec, ino_offset));
- ino_offset = XFS_INO_TO_AGINO(mp, ino) - irec->ino_startnum;
-
- ASSERT(!is_inode_refchecked(ino, irec, ino_offset));
-
- if ((error = libxfs_iget(mp, NULL, ino, 0, &ip, 0))) {
- if (!no_modify)
- do_error(
- _("couldn't map inode %llu, err = %d\n"),
- ino, error);
- else {
- do_warn(
- _("couldn't map inode %llu, err = %d\n"),
- ino, error);
- /*
- * see below for what we're doing if this
- * is root. Why do we need to do this here?
- * to ensure that the root doesn't show up
- * as being disconnected in the no_modify case.
- */
- if (mp->m_sb.sb_rootino == ino) {
- add_inode_reached(irec, 0);
- add_inode_ref(irec, 0);
- }
- }
-
- add_inode_refchecked(ino, irec, 0);
- continue;
- }
-
- need_dot = dirty = num_illegal = 0;
-
- if (mp->m_sb.sb_rootino == ino) {
+ error = libxfs_iget(mp, NULL, ino, 0, &ip, 0);
+ if (error) {
+ if (!no_modify)
+ do_error(_("couldn't map inode %llu, err = %d\n"),
+ ino, error);
+ else {
+ do_warn(_("couldn't map inode %llu, err = %d\n"),
+ ino, error);
/*
- * mark root inode reached and bump up
- * link count for root inode to account
- * for '..' entry since the root inode is
- * never reached by a parent. we know
- * that root's '..' is always good --
- * guaranteed by phase 3 and/or below.
+ * see below for what we're doing if this
+ * is root. Why do we need to do this here?
+ * to ensure that the root doesn't show up
+ * as being disconnected in the no_modify case.
*/
- add_inode_reached(irec, ino_offset);
+ if (mp->m_sb.sb_rootino == ino) {
+ add_inode_reached(irec, 0);
+ add_inode_ref(irec, 0);
+ }
}
- add_inode_refchecked(ino, irec, ino_offset);
+ add_inode_refchecked(ino, irec, 0);
+ return;
+ }
- hashtab = dir_hash_init(ip->i_d.di_size);
+ need_dot = dirty = num_illegal = 0;
+ if (mp->m_sb.sb_rootino == ino) {
/*
- * look for bogus entries
+ * mark root inode reached and bump up
+ * link count for root inode to account
+ * for '..' entry since the root inode is
+ * never reached by a parent. we know
+ * that root's '..' is always good --
+ * guaranteed by phase 3 and/or below.
*/
- switch (ip->i_d.di_format) {
+ add_inode_reached(irec, ino_offset);
+ }
+
+ add_inode_refchecked(ino, irec, ino_offset);
+
+ hashtab = dir_hash_init(ip->i_d.di_size);
+
+ /*
+ * look for bogus entries
+ */
+ switch (ip->i_d.di_format) {
case XFS_DINODE_FMT_EXTENTS:
case XFS_DINODE_FMT_BTREE:
/*
if (XFS_SB_VERSION_HASDIRV2(&mp->m_sb))
longform_dir2_entry_check(mp, ino, ip,
&num_illegal, &need_dot,
- stack, irec,
- ino_offset,
+ irec, ino_offset,
hashtab);
else
longform_dir_entry_check(mp, ino, ip,
&num_illegal, &need_dot,
- stack, irec,
- ino_offset,
+ irec, ino_offset,
hashtab);
break;
+
case XFS_DINODE_FMT_LOCAL:
tp = libxfs_trans_alloc(mp, 0);
/*
if (XFS_SB_VERSION_HASDIRV2(&mp->m_sb))
shortform_dir2_entry_check(mp, ino, ip, &dirty,
- stack, irec,
- ino_offset,
+ irec, ino_offset,
hashtab);
else
shortform_dir_entry_check(mp, ino, ip, &dirty,
- stack, irec,
- ino_offset,
+ irec, ino_offset,
hashtab);
ASSERT(dirty == 0 || (dirty && !no_modify));
if (dirty) {
libxfs_trans_log_inode(tp, ip,
XFS_ILOG_CORE | XFS_ILOG_DDATA);
- libxfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES
- |XFS_TRANS_SYNC, 0);
+ libxfs_trans_commit(tp,
+ XFS_TRANS_RELEASE_LOG_RES |
+ XFS_TRANS_SYNC, 0);
} else {
- libxfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES);
+ libxfs_trans_cancel(tp,
+ XFS_TRANS_RELEASE_LOG_RES);
}
break;
+
default:
break;
- }
- dir_hash_done(hashtab);
+ }
+ dir_hash_done(hashtab);
- hashval = 0;
+ hashval = 0;
- /*
- * if we have to create a .. for /, do it now *before*
- * we delete the bogus entries, otherwise the directory
- * could transform into a shortform dir which would
- * probably cause the simulation to choke. Even
- * if the illegal entries get shifted around, it's ok
- * because the entries are structurally intact and in
- * in hash-value order so the simulation won't get confused
- * if it has to move them around.
- */
- if (!no_modify && need_root_dotdot &&
- ino == mp->m_sb.sb_rootino) {
- ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_LOCAL);
+ /*
+ * if we have to create a .. for /, do it now *before*
+ * we delete the bogus entries, otherwise the directory
+ * could transform into a shortform dir which would
+ * probably cause the simulation to choke. Even
+ * if the illegal entries get shifted around, it's ok
+ * because the entries are structurally intact and in
+ * in hash-value order so the simulation won't get confused
+ * if it has to move them around.
+ */
+ if (!no_modify && need_root_dotdot && ino == mp->m_sb.sb_rootino) {
+ ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_LOCAL);
- do_warn(_("recreating root directory .. entry\n"));
+ do_warn(_("recreating root directory .. entry\n"));
- tp = libxfs_trans_alloc(mp, 0);
- ASSERT(tp != NULL);
+ tp = libxfs_trans_alloc(mp, 0);
+ ASSERT(tp != NULL);
- nres = XFS_MKDIR_SPACE_RES(mp, 2);
- error = libxfs_trans_reserve(tp, nres,
- XFS_MKDIR_LOG_RES(mp),
- 0,
- XFS_TRANS_PERM_LOG_RES,
- XFS_MKDIR_LOG_COUNT);
+ nres = XFS_MKDIR_SPACE_RES(mp, 2);
+ error = libxfs_trans_reserve(tp, nres, XFS_MKDIR_LOG_RES(mp),
+ 0, XFS_TRANS_PERM_LOG_RES, XFS_MKDIR_LOG_COUNT);
+ if (error)
+ res_failed(error);
- if (error)
- res_failed(error);
+ libxfs_trans_ijoin(tp, ip, 0);
+ libxfs_trans_ihold(tp, ip);
- libxfs_trans_ijoin(tp, ip, 0);
- libxfs_trans_ihold(tp, ip);
+ XFS_BMAP_INIT(&flist, &first);
- XFS_BMAP_INIT(&flist, &first);
+ error = dir_createname(mp, tp, ip, "..", 2, ip->i_ino, &first,
+ &flist, nres);
+ if (error)
+ do_error(_("can't make \"..\" entry in root inode "
+ "%llu, createname error %d\n"), ino, error);
- if ((error = dir_createname(mp, tp, ip, "..", 2,
- ip->i_ino, &first, &flist, nres)))
- do_error(
-_("can't make \"..\" entry in root inode %llu, createname error %d\n"),
- ino, error);
+ libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+ error = libxfs_bmap_finish(&tp, &flist, first, &committed);
+ ASSERT(error == 0);
+ libxfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES |
+ XFS_TRANS_SYNC, 0);
- error = libxfs_bmap_finish(&tp, &flist, first,
- &committed);
- ASSERT(error == 0);
- libxfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES
- |XFS_TRANS_SYNC, 0);
+ need_root_dotdot = 0;
+ } else if (need_root_dotdot && ino == mp->m_sb.sb_rootino) {
+ do_warn(_("would recreate root directory .. entry\n"));
+ }
- need_root_dotdot = 0;
- } else if (need_root_dotdot && ino == mp->m_sb.sb_rootino) {
- do_warn(_("would recreate root directory .. entry\n"));
+ /*
+ * delete any illegal entries -- which should only exist
+ * if the directory is a longform directory. bogus
+ * shortform directory entries were deleted in phase 4.
+ */
+ if (!no_modify && num_illegal > 0) {
+ ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_LOCAL);
+ ASSERT(!XFS_SB_VERSION_HASDIRV2(&mp->m_sb));
+
+ while (num_illegal > 0 && ip->i_d.di_format !=
+ XFS_DINODE_FMT_LOCAL) {
+ prune_lf_dir_entry(mp, ino, ip, &hashval);
+ num_illegal--;
}
/*
- * delete any illegal entries -- which should only exist
- * if the directory is a longform directory. bogus
- * shortform directory entries were deleted in phase 4.
- */
- if (!no_modify && num_illegal > 0) {
- ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_LOCAL);
- ASSERT(!XFS_SB_VERSION_HASDIRV2(&mp->m_sb));
-
- while (num_illegal > 0 && ip->i_d.di_format !=
- XFS_DINODE_FMT_LOCAL) {
- prune_lf_dir_entry(mp, ino, ip, &hashval);
- num_illegal--;
- }
-
+ * handle case where we've deleted so many
+ * entries that the directory has changed from
+ * a longform to a shortform directory. have
+ * to allocate a transaction since we're working
+ * with the incore data fork.
+ */
+ if (num_illegal > 0) {
+ ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_LOCAL);
+ tp = libxfs_trans_alloc(mp, 0);
/*
- * handle case where we've deleted so many
- * entries that the directory has changed from
- * a longform to a shortform directory. have
- * to allocate a transaction since we're working
- * with the incore data fork.
- */
- if (num_illegal > 0) {
- ASSERT(ip->i_d.di_format ==
- XFS_DINODE_FMT_LOCAL);
- tp = libxfs_trans_alloc(mp, 0);
- /*
- * using the remove reservation is overkill
- * since at most we'll only need to log the
- * inode but it's easier than wedging a
- * new define in ourselves. 10 block fs
- * space reservation is also overkill but
- * what the heck...
- */
- nres = XFS_REMOVE_SPACE_RES(mp);
- error = libxfs_trans_reserve(tp, nres,
- XFS_REMOVE_LOG_RES(mp), 0,
- XFS_TRANS_PERM_LOG_RES,
- XFS_REMOVE_LOG_COUNT);
- if (error)
- res_failed(error);
+ * using the remove reservation is overkill
+ * since at most we'll only need to log the
+ * inode but it's easier than wedging a
+ * new define in ourselves. 10 block fs
+ * space reservation is also overkill but
+ * what the heck...
+ */
+ nres = XFS_REMOVE_SPACE_RES(mp);
+ error = libxfs_trans_reserve(tp, nres,
+ XFS_REMOVE_LOG_RES(mp), 0,
+ XFS_TRANS_PERM_LOG_RES,
+ XFS_REMOVE_LOG_COUNT);
+ if (error)
+ res_failed(error);
- libxfs_trans_ijoin(tp, ip, 0);
- libxfs_trans_ihold(tp, ip);
+ libxfs_trans_ijoin(tp, ip, 0);
+ libxfs_trans_ihold(tp, ip);
- prune_sf_dir_entry(mp, ino, ip);
+ prune_sf_dir_entry(mp, ino, ip);
- libxfs_trans_log_inode(tp, ip,
- XFS_ILOG_CORE | XFS_ILOG_DDATA);
- ASSERT(error == 0);
- libxfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES
- |XFS_TRANS_SYNC, 0);
- }
+ libxfs_trans_log_inode(tp, ip,
+ XFS_ILOG_CORE | XFS_ILOG_DDATA);
+ ASSERT(error == 0);
+ libxfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES
+ |XFS_TRANS_SYNC, 0);
}
+ }
+ /*
+ * if we need to create the '.' entry, do so only if
+ * the directory is a longform dir. it it's been
+ * turned into a shortform dir, then the inode is ok
+ * since shortform dirs have no '.' entry and the inode
+ * has already been committed by prune_lf_dir_entry().
+ */
+ if (need_dot) {
/*
- * if we need to create the '.' entry, do so only if
- * the directory is a longform dir. it it's been
- * turned into a shortform dir, then the inode is ok
- * since shortform dirs have no '.' entry and the inode
- * has already been committed by prune_lf_dir_entry().
+ * bump up our link count but don't
+ * bump up the inode link count. chances
+ * are good that even though we lost '.'
+ * the inode link counts reflect '.' so
+ * leave the inode link count alone and if
+ * it turns out to be wrong, we'll catch
+ * that in phase 7.
*/
- if (need_dot) {
+ add_inode_ref(irec, ino_offset);
+
+ if (no_modify) {
+ do_warn(_("would create missing \".\" entry in dir ino %llu\n"),
+ ino);
+ } else if (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL) {
/*
- * bump up our link count but don't
- * bump up the inode link count. chances
- * are good that even though we lost '.'
- * the inode link counts reflect '.' so
- * leave the inode link count alone and if
- * it turns out to be wrong, we'll catch
- * that in phase 7.
+ * need to create . entry in longform dir.
*/
- add_inode_ref(irec, ino_offset);
+ do_warn(_("creating missing \".\" entry in dir ino %llu\n"),
+ ino);
- if (no_modify) {
- do_warn(
- _("would create missing \".\" entry in dir ino %llu\n"),
- ino);
- } else if (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL) {
- /*
- * need to create . entry in longform dir.
- */
- do_warn(
- _("creating missing \".\" entry in dir ino %llu\n"),
- ino);
-
- tp = libxfs_trans_alloc(mp, 0);
- ASSERT(tp != NULL);
+ tp = libxfs_trans_alloc(mp, 0);
+ ASSERT(tp != NULL);
- nres = XFS_MKDIR_SPACE_RES(mp, 1);
- error = libxfs_trans_reserve(tp, nres,
- XFS_MKDIR_LOG_RES(mp),
- 0,
- XFS_TRANS_PERM_LOG_RES,
- XFS_MKDIR_LOG_COUNT);
+ nres = XFS_MKDIR_SPACE_RES(mp, 1);
+ error = libxfs_trans_reserve(tp, nres,
+ XFS_MKDIR_LOG_RES(mp),
+ 0,
+ XFS_TRANS_PERM_LOG_RES,
+ XFS_MKDIR_LOG_COUNT);
- if (error)
- res_failed(error);
+ if (error)
+ res_failed(error);
- libxfs_trans_ijoin(tp, ip, 0);
- libxfs_trans_ihold(tp, ip);
+ libxfs_trans_ijoin(tp, ip, 0);
+ libxfs_trans_ihold(tp, ip);
- XFS_BMAP_INIT(&flist, &first);
+ XFS_BMAP_INIT(&flist, &first);
- if ((error = dir_createname(mp, tp, ip, ".",
- 1, ip->i_ino, &first, &flist,
- nres)))
- do_error(
- _("can't make \".\" entry in dir ino %llu, createname error %d\n"),
- ino, error);
+ if ((error = dir_createname(mp, tp, ip, ".",
+ 1, ip->i_ino, &first, &flist,
+ nres)))
+ do_error(_("can't make \".\" entry in dir ino "
+ "%llu, createname error %d\n"),
+ ino, error);
- libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+ libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- error = libxfs_bmap_finish(&tp, &flist, first,
- &committed);
- ASSERT(error == 0);
- libxfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES
- |XFS_TRANS_SYNC, 0);
- }
+ error = libxfs_bmap_finish(&tp, &flist, first,
+ &committed);
+ ASSERT(error == 0);
+ libxfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES
+ |XFS_TRANS_SYNC, 0);
}
-
- libxfs_iput(ip, 0);
}
+
+ libxfs_iput(ip, 0);
}
/*
static void
check_for_orphaned_inodes(
xfs_mount_t *mp,
+ xfs_agnumber_t agno,
ino_tree_node_t *irec)
{
int i;
- int err;
xfs_ino_t ino;
for (i = 0; i < XFS_INODES_PER_CHUNK; i++) {
if (is_inode_free(irec, i))
continue;
- if (!is_inode_reached(irec, i)) {
- ASSERT(inode_isadir(irec, i) ||
- num_inode_references(irec, i) == 0);
- ino = XFS_AGINO_TO_INO(mp, i, i + irec->ino_startnum);
- if (inode_isadir(irec, i))
- do_warn(_("disconnected dir inode %llu, "), ino);
- else
- do_warn(_("disconnected inode %llu, "), ino);
- if (!no_modify) {
- if (!orphanage_ino)
- orphanage_ino = mk_orphanage(mp);
- if (!orphanage_ip) {
- err = libxfs_iget(mp, NULL, orphanage_ino, 0, &orphanage_ip, 0);
- if (err)
- do_error(_("%d - couldn't iget orphanage inode\n"), err);
- }
- do_warn(_("moving to %s\n"), ORPHANAGE);
- mv_orphanage(mp, ino, inode_isadir(irec, i));
- } else {
- do_warn(_("would move to %s\n"), ORPHANAGE);
- }
- /*
- * for read-only case, even though the inode isn't
- * really reachable, set the flag (and bump our link
- * count) anyway to fool phase 7
- */
- add_inode_reached(irec, i);
+ if (is_inode_reached(irec, i))
+ continue;
+
+ ASSERT(inode_isadir(irec, i) ||
+ num_inode_references(irec, i) == 0);
+
+ ino = XFS_AGINO_TO_INO(mp, agno, i + irec->ino_startnum);
+ if (inode_isadir(irec, i))
+ do_warn(_("disconnected dir inode %llu, "), ino);
+ else
+ do_warn(_("disconnected inode %llu, "), ino);
+ if (!no_modify) {
+ if (!orphanage_ino)
+ orphanage_ino = mk_orphanage(mp);
+ do_warn(_("moving to %s\n"), ORPHANAGE);
+ mv_orphanage(mp, ino, inode_isadir(irec, i));
+ } else {
+ do_warn(_("would move to %s\n"), ORPHANAGE);
}
+ /*
+ * for read-only case, even though the inode isn't
+ * really reachable, set the flag (and bump our link
+ * count) anyway to fool phase 7
+ */
+ add_inode_reached(irec, i);
}
}
static void
-traverse_function(xfs_mount_t *mp, xfs_agnumber_t agno)
+traverse_function(
+ work_queue_t *wq,
+ xfs_agnumber_t agno,
+ void *arg)
{
- register ino_tree_node_t *irec;
- int j;
- xfs_ino_t ino;
- dir_stack_t stack;
+ ino_tree_node_t *irec;
+ int i;
+ prefetch_args_t *pf_args = arg;
+
+ wait_for_inode_prefetch(pf_args);
if (verbose)
do_log(_(" - agno = %d\n"), agno);
- dir_stack_init(&stack);
- irec = findfirst_inode_rec(agno);
-
- while (irec != NULL) {
- for (j = 0; j < XFS_INODES_PER_CHUNK; j++) {
- if (!inode_isadir(irec, j)) {
- ino = XFS_AGINO_TO_INO(mp, agno,
- irec->ino_startnum + j);
- if (mp->m_sb.sb_rootino != ino)
- continue;
- }
+ for (irec = findfirst_inode_rec(agno); irec; irec = next_ino_rec(irec)) {
+ if (irec->ino_isa_dir == 0)
+ continue;
- ino = XFS_AGINO_TO_INO(mp, agno,
- irec->ino_startnum + j);
+ if (pf_args)
+ sem_post(&pf_args->ra_count);
- push_dir(&stack, ino);
- process_dirstack(mp, &stack);
+ for (i = 0; i < XFS_INODES_PER_CHUNK; i++) {
+ if (inode_isadir(irec, i))
+ process_dir_inode(wq->mp,
+ XFS_AGINO_TO_INO(wq->mp, agno,
+ irec->ino_startnum + i), irec, i);
}
- irec = next_ino_rec(irec);
}
- return;
+ cleanup_inode_prefetch(pf_args);
}
static void
-traverse_alt(xfs_mount_t *mp)
+traverse_ags(
+ xfs_mount_t *mp)
{
int i;
-
- set_progress_msg(PROG_FMT_TRAVERSAL, (__uint64_t) glob_agcount);
- for (i = 0; i < mp->m_sb.sb_agcount; i++) {
- traverse_function(mp, i);
- PROG_RPT_INC(prog_rpt_done[i], 1);
+ work_queue_t *queues;
+ prefetch_args_t *pf_args[2];
+
+ queues = malloc(thread_count * sizeof(work_queue_t));
+ queues[0].mp = mp;
+
+ if (!libxfs_bcache_overflowed()) {
+ /*create_work_queue(&queues[0], mp, libxfs_nproc());
+ for (i = 0; i < glob_agcount; i++)
+ queue_work(&queues[0], traverse_function, i, NULL);
+ destroy_work_queue(&queues[0]);*/
+ for (i = 0; i < glob_agcount; i++)
+ traverse_function(&queues[0], i, NULL);
+ } else {
+ /* TODO: AG stride support */
+ pf_args[0] = start_inode_prefetch(0, 1, NULL);
+ for (i = 0; i < glob_agcount; i++) {
+ pf_args[(~i) & 1] = start_inode_prefetch(i + 1, 1,
+ pf_args[i & 1]);
+ traverse_function(&queues[0], i, pf_args[i & 1]);
+ }
}
- print_final_rpt();
+ free(queues);
}
void
phase6(xfs_mount_t *mp)
{
- xfs_ino_t ino;
ino_tree_node_t *irec;
- dir_stack_t stack;
int i;
- int j;
- xfs_ino_t orphanage_ino;
bzero(&zerocr, sizeof(struct cred));
bzero(&zerofsx, sizeof(struct fsxattr));
}
}
- dir_stack_init(&stack);
-
mark_standalone_inodes(mp);
- /*
- * push root dir on stack, then go
- */
- if (!need_root_inode) {
- do_log(_(" - traversing filesystem starting at / ... \n"));
-
- if (do_prefetch) {
- traverse_alt(mp);
- } else {
- push_dir(&stack, mp->m_sb.sb_rootino);
- process_dirstack(mp, &stack);
- }
-
- do_log(_(" - traversal finished ... \n"));
- } else {
- ASSERT(no_modify != 0);
-
- do_log(
-_(" - root inode lost, cannot make new one in no modify mode ... \n"));
- do_log(
-_(" - skipping filesystem traversal from / ... \n"));
- }
-
- do_log(_(" - traversing all unattached subtrees ... \n"));
+ do_log(_(" - traversing filesystem ... \n"));
irec = find_inode_rec(XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino),
XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino));
}
/*
- * then process all unreached inodes
- * by walking incore inode tree
- *
- * get next unreached directory inode # from
- * incore list
- * push inode on dir stack
- * call process_dirstack
+ * then process all inodes by walking incore inode tree
*/
- for (i = 0; i < glob_agcount; i++) {
- irec = findfirst_inode_rec(i);
-
- if (irec == NULL)
- continue;
-
- while (irec != NULL) {
- for (j = 0; j < XFS_INODES_PER_CHUNK; j++) {
- if (!is_inode_confirmed(irec, j))
- continue;
- /*
- * skip directories that have already been
- * processed, even if they haven't been
- * reached. If they are reachable, we'll
- * pick them up when we process their parent.
- */
- ino = XFS_AGINO_TO_INO(mp, i,
- j + irec->ino_startnum);
- if (inode_isadir(irec, j) &&
- !is_inode_refchecked(ino,
- irec, j)) {
- push_dir(&stack, ino);
- process_dirstack(mp, &stack);
- }
- }
- irec = next_ino_rec(irec);
- }
- }
+ traverse_ags(mp);
do_log(_(" - traversals finished ... \n"));
do_log(_(" - moving disconnected inodes to %s ... \n"),
for (i = 0; i < glob_agcount; i++) {
irec = findfirst_inode_rec(i);
while (irec != NULL) {
- check_for_orphaned_inodes(mp, irec);
+ check_for_orphaned_inodes(mp, i, irec);
irec = next_ino_rec(irec);
}
}
#include "err_protos.h"
#include "dinode.h"
#include "versions.h"
-#include "prefetch.h"
#include "progress.h"
-#include "threads.h"
/* dinoc is a pointer to the IN-CORE dinode core */
static void
}
}
-static void
-phase7_alt_function(xfs_mount_t *mp, xfs_agnumber_t agno)
-{
- ino_tree_node_t *irec;
- int j;
- __uint32_t nrefs;
-
- /*
- * using the nlink values memorised during phase3/4, compare to the
- * nlink counted in phase 6, and if different, update on-disk.
- */
-
- irec = findfirst_inode_rec(agno);
-
- while (irec != NULL) {
- for (j = 0; j < XFS_INODES_PER_CHUNK; j++) {
- assert(is_inode_confirmed(irec, j));
-
- if (is_inode_free(irec, j))
- continue;
-
- assert(no_modify || is_inode_reached(irec, j));
- assert(no_modify || is_inode_referenced(irec, j));
-
- nrefs = num_inode_references(irec, j);
-
- if (get_inode_disk_nlinks(irec, j) != nrefs)
- update_inode_nlinks(mp, XFS_AGINO_TO_INO(mp,
- agno, irec->ino_startnum + j),
- nrefs);
- }
- irec = next_ino_rec(irec);
- PROG_RPT_INC(prog_rpt_done[agno], XFS_INODES_PER_CHUNK);
- }
-}
-
-static void
-phase7_alt(xfs_mount_t *mp)
-{
- int i;
-
- set_progress_msg(no_modify ? PROGRESS_FMT_VRFY_LINK : PROGRESS_FMT_CORR_LINK,
- (__uint64_t) mp->m_sb.sb_icount);
-
- for (i = 0; i < glob_agcount; i++) {
- queue_work(phase7_alt_function, mp, i);
- }
- wait_for_workers();
- print_final_rpt();
-}
-
void
phase7(xfs_mount_t *mp)
{
else
do_log(_("Phase 7 - verify link counts...\n"));
- if (do_prefetch) {
- phase7_alt(mp);
- return;
- }
-
/*
* for each ag, look at each inode 1 at a time. If the number of
* links is bad, reset it, log the inode core, commit the transaction
#include <libxfs.h>
-#include "prefetch.h"
-#include "aio.h"
+#include <pthread.h>
#include "avl.h"
#include "globals.h"
#include "agheader.h"
#include "dinode.h"
#include "bmap.h"
#include "versions.h"
+#include "threads.h"
+#include "prefetch.h"
+#include "progress.h"
+#include "radix-tree.h"
int do_prefetch = 1;
-ino_tree_node_t *
-prefetch_inode_chunks(xfs_mount_t *mp,
- xfs_agnumber_t agno,
- ino_tree_node_t *ino_ra)
-{
- xfs_agblock_t agbno;
- libxfs_lio_req_t *liop;
- int i;
+/*
+ * Performs prefetching by priming the libxfs cache by using a dedicate thread
+ * scanning inodes and reading blocks in ahead of time they are required.
+ *
+ * Any I/O errors can be safely ignored.
+ */
- if (libxfs_lio_ino_count == 0)
- return NULL;
+static xfs_mount_t *mp;
+static int mp_fd;
+static int pf_max_bytes;
+static int pf_max_bbs;
+static int pf_max_fsbs;
+static int pf_batch_bytes;
+static int pf_batch_fsbs;
- liop = (libxfs_lio_req_t *) libxfs_get_lio_buffer(LIBXFS_LIO_TYPE_INO);
- if (liop == NULL) {
- do_prefetch = 0;
- return NULL;
- }
+#define B_INODE 0x1000000
+#define B_META 0x2000000
- if (ino_ra == NULL)
- ino_ra = findfirst_inode_rec(agno);
-
- i = 0;
- while (ino_ra) {
- agbno = XFS_AGINO_TO_AGBNO(mp, ino_ra->ino_startnum);
- liop[i].blkno = XFS_AGB_TO_DADDR(mp, agno, agbno);
- liop[i].len = (int) XFS_FSB_TO_BB(mp, XFS_IALLOC_BLOCKS(mp));
- i++;
- ino_ra = next_ino_rec(ino_ra);
- if (i >= libxfs_lio_ino_count)
- break;
+#define DEF_BATCH_BYTES 0x10000
+
+#define MAX_BUFS 128
+
+#define IO_THRESHOLD (MAX_BUFS * PF_THREAD_COUNT)
+
+typedef enum pf_which {
+ PF_PRIMARY,
+ PF_SECONDARY,
+ PF_META_ONLY
+} pf_which_t;
+
+
+static inline void
+pf_start_processing(
+ prefetch_args_t *args)
+{
+ if (!args->can_start_processing) {
+#ifdef XR_PF_TRACE
+ pftrace("signalling processing for AG %d", args->agno);
+#endif
+ args->can_start_processing = 1;
+ pthread_cond_signal(&args->start_processing);
}
- if (i) {
- if (libxfs_readbuf_list(mp->m_dev, i, (void *) liop, LIBXFS_LIO_TYPE_INO) == -1)
- do_prefetch = 0;
+}
+
+static inline void
+pf_start_io_workers(
+ prefetch_args_t *args)
+{
+ if (!args->can_start_reading) {
+#ifdef XR_PF_TRACE
+ pftrace("signalling reading for AG %d", args->agno);
+#endif
+ args->can_start_reading = 1;
+ pthread_cond_broadcast(&args->start_reading);
}
- libxfs_put_lio_buffer((void *) liop);
- return (ino_ra);
}
+
static void
-prefetch_node(
- xfs_mount_t *mp,
- xfs_buf_t *bp,
- da_bt_cursor_t *da_cursor)
+pf_queue_io(
+ prefetch_args_t *args,
+ xfs_fsblock_t fsbno,
+ int blen,
+ int flag)
{
- xfs_da_intnode_t *node;
- libxfs_lio_req_t *liop;
- int i;
- xfs_dfsbno_t fsbno;
-
- node = (xfs_da_intnode_t *)XFS_BUF_PTR(bp);
- if (INT_GET(node->hdr.count, ARCH_CONVERT) <= 1)
- return;
+ xfs_buf_t *bp;
- if ((liop = (libxfs_lio_req_t *) libxfs_get_lio_buffer(LIBXFS_LIO_TYPE_DIR)) == NULL) {
+ bp = libxfs_getbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, fsbno),
+ XFS_FSB_TO_BB(mp, blen));
+ if (bp->b_flags & LIBXFS_B_UPTODATE) {
+ libxfs_putbuf(bp);
return;
}
+ bp->b_flags |= flag;
- for (i = 0; i < INT_GET(node->hdr.count, ARCH_CONVERT); i++) {
- if (i == libxfs_lio_dir_count)
- break;
+ pthread_mutex_lock(&args->lock);
- fsbno = blkmap_get(da_cursor->blkmap, INT_GET(node->btree[i].before, ARCH_CONVERT));
- if (fsbno == NULLDFSBNO) {
- libxfs_put_lio_buffer((void *) liop);
- return;
+ if (fsbno > args->last_bno_read) {
+ radix_tree_insert(&args->primary_io_queue, fsbno, bp);
+ if (flag == B_META)
+ radix_tree_tag_set(&args->primary_io_queue, fsbno, 0);
+ else {
+ args->inode_bufs_queued++;
+ if (args->inode_bufs_queued == IO_THRESHOLD)
+ pf_start_io_workers(args);
}
-
- liop[i].blkno = XFS_FSB_TO_DADDR(mp, fsbno);
- liop[i].len = XFS_FSB_TO_BB(mp, 1);
+#ifdef XR_PF_TRACE
+ pftrace("getbuf %c %p (%llu) in AG %d (fsbno = %lu) added to "
+ "primary queue (inode_bufs_queued = %d, last_bno = %lu)",
+ flag == B_INODE ? 'I' : 'M', bp,
+ (long long)XFS_BUF_ADDR(bp), args->agno, fsbno,
+ args->inode_bufs_queued, args->last_bno_read);
+#endif
+ } else {
+#ifdef XR_PF_TRACE
+ pftrace("getbuf %c %p (%llu) in AG %d (fsbno = %lu) added to "
+ "secondary queue (last_bno = %lu)",
+ flag == B_INODE ? 'I' : 'M', bp,
+ (long long)XFS_BUF_ADDR(bp), args->agno, fsbno,
+ args->last_bno_read);
+#endif
+ ASSERT(flag == B_META);
+ radix_tree_insert(&args->secondary_io_queue, fsbno, bp);
}
- if (i > 1) {
- if (libxfs_readbuf_list(mp->m_dev, i, (void *) liop, LIBXFS_LIO_TYPE_DIR) == -1)
- do_prefetch = 0;
- }
+ pf_start_processing(args);
- libxfs_put_lio_buffer((void *) liop);
- return;
+ pthread_mutex_unlock(&args->lock);
}
-void
-prefetch_dir1(
- xfs_mount_t *mp,
- xfs_dablk_t bno,
- da_bt_cursor_t *da_cursor)
+static int
+pf_read_bmbt_reclist(
+ prefetch_args_t *args,
+ xfs_bmbt_rec_t *rp,
+ int numrecs)
{
- xfs_da_intnode_t *node;
- xfs_buf_t *bp;
- xfs_dfsbno_t fsbno;
int i;
+ xfs_dfsbno_t s; /* start */
+ xfs_dfilblks_t c; /* count */
+ xfs_dfiloff_t o; /* offset */
+ xfs_dfilblks_t cp = 0; /* prev count */
+ xfs_dfiloff_t op = 0; /* prev offset */
+ int flag; /* extent flag */
+
+ for (i = 0; i < numrecs; i++, rp++) {
+ convert_extent((xfs_bmbt_rec_32_t*)rp, &o, &s, &c, &flag);
+
+ if (((i > 0) && (op + cp > o)) || (c == 0) ||
+ (o >= fs_max_file_offset))
+ return 0;
+
+ if (!verify_dfsbno(mp, s) || !verify_dfsbno(mp, s + c - 1))
+ return 0;
+
+ if (!args->dirs_only && ((o + c) >= mp->m_dirfreeblk))
+ break; /* only Phase 6 reads the free blocks */
+
+ op = o;
+ cp = c;
+
+ while (c) {
+#ifdef XR_PF_TRACE
+ pftrace("queuing dir extent in AG %d", args->agno);
+#endif
+ pf_queue_io(args, s, 1, B_META);
+ c--;
+ s++;
+ }
+ }
+ return 1;
+}
- fsbno = blkmap_get(da_cursor->blkmap, bno);
- if (fsbno == NULLDFSBNO)
- return;
+/*
+ * simplified version of the main scan_lbtree. Returns 0 to stop.
+ */
+
+static int
+pf_scan_lbtree(
+ xfs_dfsbno_t dbno,
+ int level,
+ int isadir,
+ prefetch_args_t *args,
+ int (*func)(xfs_btree_lblock_t *block,
+ int level,
+ int isadir,
+ prefetch_args_t *args))
+{
+ xfs_buf_t *bp;
+ int rc;
- bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, fsbno),
+ bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, dbno),
XFS_FSB_TO_BB(mp, 1), 0);
+ if (!bp)
+ return 0;
- if (bp == NULL)
- return;
+ rc = (*func)((xfs_btree_lblock_t *)XFS_BUF_PTR(bp), level - 1, isadir, args);
+ libxfs_putbuf(bp);
- node = (xfs_da_intnode_t *)XFS_BUF_PTR(bp);
- if (INT_GET(node->hdr.info.magic, ARCH_CONVERT) != XFS_DA_NODE_MAGIC) {
- libxfs_putbuf(bp);
- return;
+ return rc;
+}
+
+static int
+pf_scanfunc_bmap(
+ xfs_btree_lblock_t *block,
+ int level,
+ int isadir,
+ prefetch_args_t *args)
+{
+ xfs_bmbt_rec_t *rp;
+ xfs_bmbt_ptr_t *pp;
+ int numrecs;
+ int i;
+ xfs_dfsbno_t dbno;
+
+ /*
+ * do some validation on the block contents
+ */
+ if ((be32_to_cpu(block->bb_magic) != XFS_BMAP_MAGIC) ||
+ (be16_to_cpu(block->bb_level) != level))
+ return 0;
+
+ numrecs = be16_to_cpu(block->bb_numrecs);
+
+ if (level == 0) {
+ if (numrecs > mp->m_bmap_dmxr[0] || !isadir)
+ return 0;
+
+ rp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt,
+ block, 1, mp->m_bmap_dmxr[0]);
+
+ return pf_read_bmbt_reclist(args, rp, numrecs);
}
- prefetch_node(mp, bp, da_cursor);
+ if (numrecs > mp->m_bmap_dmxr[1])
+ return 0;
- /* skip prefetching if next level is leaf level */
- if (INT_GET(node->hdr.level, ARCH_CONVERT) > 1) {
- for (i = 0; i < INT_GET(node->hdr.count, ARCH_CONVERT); i++) {
- prefetch_dir1(mp,
- INT_GET(node->btree[i].before, ARCH_CONVERT),
- da_cursor);
- }
+ pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, block, 1,
+ mp->m_bmap_dmxr[1]);
+
+ for (i = 0; i < numrecs; i++) {
+ dbno = be64_to_cpu(pp[i]);
+ if (!verify_dfsbno(mp, dbno))
+ return 0;
+ if (!pf_scan_lbtree(dbno, level, isadir, args, pf_scanfunc_bmap))
+ return 0;
}
-
- libxfs_putbuf(bp);
- return;
+ return 1;
}
-void
-prefetch_dir2(
- xfs_mount_t *mp,
- blkmap_t *blkmap)
+
+static void
+pf_read_btinode(
+ prefetch_args_t *args,
+ xfs_dinode_t *dino,
+ int isadir)
{
- xfs_dfiloff_t dbno;
- xfs_dfiloff_t pdbno;
- bmap_ext_t *bmp;
- int nex;
- int i, j, t;
- libxfs_lio_req_t *liop;
-
- liop = (libxfs_lio_req_t *) libxfs_get_lio_buffer(LIBXFS_LIO_TYPE_DIR);
- if (liop == NULL)
+ xfs_bmdr_block_t *dib;
+ xfs_bmbt_ptr_t *pp;
+ int i;
+ int level;
+ int numrecs;
+ int dsize;
+ xfs_dfsbno_t dbno;
+
+ dib = (xfs_bmdr_block_t *)XFS_DFORK_DPTR(dino);
+
+ level = be16_to_cpu(dib->bb_level);
+ numrecs = be16_to_cpu(dib->bb_numrecs);
+
+ if ((numrecs == 0) || (level == 0) ||
+ (level > XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)))
return;
+ /*
+ * use bmdr/dfork_dsize since the root block is in the data fork
+ */
+ if (XFS_BMDR_SPACE_CALC(numrecs) > XFS_DFORK_DSIZE(dino, mp))
+ return;
+
+ dsize = XFS_DFORK_DSIZE(dino, mp);
+ pp = XFS_BTREE_PTR_ADDR(dsize, xfs_bmdr, dib, 1,
+ XFS_BTREE_BLOCK_MAXRECS(dsize, xfs_bmdr, 0));
- pdbno = NULLDFILOFF; /* previous dbno is NULLDFILOFF */
- i = 0;
- while ((dbno = blkmap_next_off(blkmap, pdbno, &t)) < mp->m_dirfreeblk) {
- if (i == libxfs_lio_dir_count)
+ for (i = 0; i < numrecs; i++) {
+ dbno = be64_to_cpu(pp[i]);
+ if (!verify_dfsbno(mp, dbno))
break;
- if (dbno == NULLDFILOFF)
+ if (!pf_scan_lbtree(dbno, level, isadir, args, pf_scanfunc_bmap))
break;
- if (mp->m_dirblkfsbs == 1) {
- xfs_dfsbno_t blk;
+ }
+}
+
+static void
+pf_read_exinode(
+ prefetch_args_t *args,
+ xfs_dinode_t *dino)
+{
+ pf_read_bmbt_reclist(args, (xfs_bmbt_rec_t *)XFS_DFORK_DPTR(dino),
+ be32_to_cpu(dino->di_core.di_nextents));
+}
- /* avoid bmp realloc/free overhead, use blkmap_get */
- blk = blkmap_get(blkmap, dbno);
- if (blk == NULLDFSBNO)
+static void
+pf_read_inode_dirs(
+ prefetch_args_t *args,
+ xfs_buf_t *bp)
+{
+ xfs_dinode_t *dino;
+ int icnt = 0;
+ xfs_dinode_core_t *dinoc;
+
+ for (icnt = 0; icnt < (XFS_BUF_COUNT(bp) >> mp->m_sb.sb_inodelog); icnt++) {
+ dino = XFS_MAKE_IPTR(mp, bp, icnt);
+ dinoc = &dino->di_core;
+
+ /*
+ * We are only prefetching directory contents in extents
+ * and btree nodes for other inodes
+ */
+ if (dinoc->di_format <= XFS_DINODE_FMT_LOCAL ||
+ (dinoc->di_format == XFS_DINODE_FMT_EXTENTS &&
+ (be16_to_cpu(dinoc->di_mode) & S_IFMT) != S_IFDIR))
+ continue;
+
+ /*
+ * do some checks on the inode to see if we can prefetch
+ * its directory data. It's a cut down version of
+ * process_dinode_int() in dinode.c.
+ */
+ if (dinoc->di_format > XFS_DINODE_FMT_BTREE)
+ continue;
+
+ if (be16_to_cpu(dinoc->di_magic) != XFS_DINODE_MAGIC)
+ continue;
+
+ if (!XFS_DINODE_GOOD_VERSION(dinoc->di_version) ||
+ (!fs_inode_nlink && dinoc->di_version >
+ XFS_DINODE_VERSION_1))
+ continue;
+
+ if (be64_to_cpu(dinoc->di_size) <= XFS_DFORK_DSIZE(dino, mp))
+ continue;
+
+ if ((dinoc->di_forkoff != 0) &&
+ (dinoc->di_forkoff >= (XFS_LITINO(mp) >> 3)))
+ continue;
+
+ switch (dinoc->di_format) {
+ case XFS_DINODE_FMT_EXTENTS:
+ pf_read_exinode(args, dino);
break;
- pdbno = dbno;
- liop[i].blkno = XFS_FSB_TO_DADDR(mp, blk);
- liop[i].len = (int) XFS_FSB_TO_BB(mp, 1);
- i++;
- }
- else if (mp->m_dirblkfsbs > 1) {
- nex = blkmap_getn(blkmap, dbno, mp->m_dirblkfsbs, &bmp, NULL);
- if (nex == 0)
+ case XFS_DINODE_FMT_BTREE:
+ pf_read_btinode(args, dino, (be16_to_cpu(
+ dinoc->di_mode) & S_IFMT) == S_IFDIR);
break;
- pdbno = dbno + mp->m_dirblkfsbs - 1;
- for (j = 0; j < nex; j++) {
- liop[i].blkno = XFS_FSB_TO_DADDR(mp, bmp[j].startblock);
- liop[i].len = (int) XFS_FSB_TO_BB(mp, bmp[j].blockcount);
- i++;
- if (i == libxfs_lio_dir_count)
- break; /* for loop */
- }
- free(bmp);
- }
- else {
- do_error("invalid mp->m_dirblkfsbs %d\n", mp->m_dirblkfsbs);
}
}
- if (i > 1) {
- if (libxfs_readbuf_list(mp->m_dev, i, (void *) liop, LIBXFS_LIO_TYPE_DIR) == -1)
- do_prefetch = 0;
- }
- libxfs_put_lio_buffer((void *) liop);
}
+/*
+ * pf_batch_read must be called with the lock locked.
+ */
+
static void
-prefetch_p6_node(
- xfs_mount_t *mp,
- xfs_inode_t *ip,
- xfs_buf_t *bp)
+pf_batch_read(
+ prefetch_args_t *args,
+ pf_which_t which,
+ void *buf)
{
- xfs_da_intnode_t *node;
- libxfs_lio_req_t *liop;
+ struct radix_tree_root *queue;
+ xfs_buf_t *bplist[MAX_BUFS];
+ unsigned int num;
+ off64_t first_off, last_off, next_off;
+ int len, size;
int i;
- xfs_fsblock_t fblock;
- xfs_dfsbno_t fsbno;
- xfs_bmbt_irec_t map;
- int nmap;
- int error;
-
- node = (xfs_da_intnode_t *)XFS_BUF_PTR(bp);
- if (INT_GET(node->hdr.count, ARCH_CONVERT) <= 1)
- return;
-
- if ((liop = (libxfs_lio_req_t *) libxfs_get_lio_buffer(LIBXFS_LIO_TYPE_DIR)) == NULL) {
- return;
- }
-
- fblock = NULLFSBLOCK;
-
- for (i = 0; i < INT_GET(node->hdr.count, ARCH_CONVERT); i++) {
- if (i == libxfs_lio_dir_count)
- break;
+ int inode_bufs;
+ unsigned long fsbno;
+ char *pbuf;
+
+ queue = (which != PF_SECONDARY) ? &args->primary_io_queue
+ : &args->secondary_io_queue;
+
+ while (radix_tree_lookup_first(queue, &fsbno) != NULL) {
+
+ if (which != PF_META_ONLY) {
+ num = radix_tree_gang_lookup_ex(queue,
+ (void**)&bplist[0], fsbno,
+ fsbno + pf_max_fsbs, MAX_BUFS);
+ ASSERT(num > 0);
+ ASSERT(XFS_FSB_TO_DADDR(mp, fsbno) ==
+ XFS_BUF_ADDR(bplist[0]));
+ } else {
+ num = radix_tree_gang_lookup_tag(queue,
+ (void**)&bplist[0], fsbno,
+ MAX_BUFS / 4, 0);
+ if (num == 0)
+ return;
+ }
- nmap = 1;
- error = libxfs_bmapi(NULL, ip, (xfs_fileoff_t)
- INT_GET(node->btree[i].before, ARCH_CONVERT), 1,
- XFS_BMAPI_METADATA, &fblock, 0,
- &map, &nmap, NULL);
+ /*
+ * do a big read if 25% of the potential buffer is useful,
+ * otherwise, find as many close together blocks and
+ * read them in one read
+ */
+ first_off = LIBXFS_BBTOOFF64(XFS_BUF_ADDR(bplist[0]));
+ last_off = LIBXFS_BBTOOFF64(XFS_BUF_ADDR(bplist[num-1])) +
+ XFS_BUF_SIZE(bplist[num-1]);
+ while (last_off - first_off > pf_max_bytes) {
+ num--;
+ last_off = LIBXFS_BBTOOFF64(XFS_BUF_ADDR(bplist[num-1])) +
+ XFS_BUF_SIZE(bplist[num-1]);
+ }
+ if (num < ((last_off - first_off) >> (mp->m_sb.sb_blocklog + 3))) {
+ /*
+ * not enough blocks for one big read, so determine
+ * the number of blocks that are close enough.
+ */
+ last_off = first_off + XFS_BUF_SIZE(bplist[0]);
+ for (i = 1; i < num; i++) {
+ next_off = LIBXFS_BBTOOFF64(XFS_BUF_ADDR(bplist[i])) +
+ XFS_BUF_SIZE(bplist[i]);
+ if (next_off - last_off > pf_batch_bytes)
+ break;
+ last_off = next_off;
+ }
+ num = i;
+ }
- if (error || (nmap != 1)) {
- libxfs_put_lio_buffer((void *) liop);
- return;
+ for (i = 0; i < num; i++) {
+ if (radix_tree_delete(queue, XFS_DADDR_TO_FSB(mp,
+ XFS_BUF_ADDR(bplist[i]))) == NULL)
+ do_error(_("prefetch corruption\n"));
}
- if ((fsbno = map.br_startblock) == HOLESTARTBLOCK) {
- libxfs_put_lio_buffer((void *) liop);
- return;
+ if (which == PF_PRIMARY) {
+ for (inode_bufs = 0, i = 0; i < num; i++) {
+ if (bplist[i]->b_flags & B_INODE)
+ inode_bufs++;
+ }
+ args->inode_bufs_queued -= inode_bufs;
+ if (inode_bufs && (first_off >> mp->m_sb.sb_blocklog) >
+ pf_batch_fsbs)
+ args->last_bno_read = (first_off >> mp->m_sb.sb_blocklog);
+ }
+#ifdef XR_PF_TRACE
+ pftrace("reading bbs %llu to %llu (%d bufs) from %s queue in AG %d (last_bno = %lu, inode_bufs = %d)",
+ (long long)XFS_BUF_ADDR(bplist[0]),
+ (long long)XFS_BUF_ADDR(bplist[num-1]), num,
+ (which != PF_SECONDARY) ? "pri" : "sec", args->agno,
+ args->last_bno_read, args->inode_bufs_queued);
+#endif
+ pthread_mutex_unlock(&args->lock);
+
+ /*
+ * now read the data and put into the xfs_but_t's
+ */
+ len = pread64(mp_fd, buf, (int)(last_off - first_off), first_off);
+ if (len > 0) {
+ /*
+ * go through the xfs_buf_t list copying from the
+ * read buffer into the xfs_buf_t's and release them.
+ */
+ last_off = first_off;
+ for (i = 0; i < num; i++) {
+
+ pbuf = ((char *)buf) + (LIBXFS_BBTOOFF64(XFS_BUF_ADDR(bplist[i])) - first_off);
+ size = XFS_BUF_SIZE(bplist[i]);
+ if (len < size)
+ break;
+ memcpy(XFS_BUF_PTR(bplist[i]), pbuf, size);
+ bplist[i]->b_flags |= LIBXFS_B_UPTODATE;
+ len -= size;
+ if (bplist[i]->b_flags & B_INODE)
+ pf_read_inode_dirs(args, bplist[i]);
+ }
+ }
+ for (i = 0; i < num; i++) {
+#ifdef XR_PF_TRACE
+ pftrace("putbuf %c %p (%llu) in AG %d",
+ bplist[i]->b_flags & B_INODE ? 'I' : 'M',
+ bplist[i], (long long)XFS_BUF_ADDR(bplist[i]),
+ args->agno);
+#endif
+ libxfs_putbuf(bplist[i]);
+ }
+ pthread_mutex_lock(&args->lock);
+ if (which != PF_SECONDARY) {
+#ifdef XR_PF_TRACE
+ pftrace("inode_bufs_queued for AG %d = %d", args->agno,
+ args->inode_bufs_queued);
+#endif
+ /*
+ * if primary inode queue running low, process metadata
+ * in boths queues to avoid I/O starvation as the
+ * processing thread would be waiting for a metadata
+ * buffer
+ */
+ if (which == PF_PRIMARY && !args->queuing_done &&
+ args->inode_bufs_queued < IO_THRESHOLD) {
+#ifdef XR_PF_TRACE
+ pftrace("reading metadata bufs from primary queue for AG %d",
+ args->agno);
+#endif
+ pf_batch_read(args, PF_META_ONLY, buf);
+#ifdef XR_PF_TRACE
+ pftrace("reading bufs from secondary queue for AG %d",
+ args->agno);
+#endif
+ pf_batch_read(args, PF_SECONDARY, buf);
+ }
}
- liop[i].blkno = XFS_FSB_TO_DADDR(mp, fsbno);
- liop[i].len = XFS_FSB_TO_BB(mp, 1);
}
+}
+
+static void *
+pf_io_worker(
+ void *param)
+{
+ prefetch_args_t *args = param;
+ void *buf = memalign(libxfs_device_alignment(),
+ pf_max_bytes);
+
+ if (buf == NULL)
+ return NULL;
- if (i > 1) {
- if (libxfs_readbuf_list(mp->m_dev, i, (void *) liop, LIBXFS_LIO_TYPE_DIR) == -1)
- do_prefetch = 0;
+ pthread_mutex_lock(&args->lock);
+ while (!args->queuing_done || args->primary_io_queue.height) {
+
+#ifdef XR_PF_TRACE
+ pftrace("waiting to start prefetch I/O for AG %d", args->agno);
+#endif
+ while (!args->can_start_reading && !args->queuing_done)
+ pthread_cond_wait(&args->start_reading, &args->lock);
+#ifdef XR_PF_TRACE
+ pftrace("starting prefetch I/O for AG %d", args->agno);
+#endif
+ pf_batch_read(args, PF_PRIMARY, buf);
+ pf_batch_read(args, PF_SECONDARY, buf);
+
+#ifdef XR_PF_TRACE
+ pftrace("ran out of bufs to prefetch for AG %d", args->agno);
+#endif
+ if (!args->queuing_done)
+ args->can_start_reading = 0;
}
+ pthread_mutex_unlock(&args->lock);
- libxfs_put_lio_buffer((void *) liop);
- return;
+ free(buf);
+
+#ifdef XR_PF_TRACE
+ pftrace("finished prefetch I/O for AG %d", args->agno);
+#endif
+ return NULL;
}
-void
-prefetch_p6_dir1(
- xfs_mount_t *mp,
- xfs_ino_t ino,
- xfs_inode_t *ip,
- xfs_dablk_t da_bno,
- xfs_fsblock_t *fblockp)
+static int
+pf_create_prefetch_thread(
+ prefetch_args_t *args);
+
+static void *
+pf_queuing_worker(
+ void *param)
{
- xfs_da_intnode_t *node;
- xfs_buf_t *bp;
- xfs_dfsbno_t fsbno;
- xfs_bmbt_irec_t map;
- int nmap;
+ prefetch_args_t *args = param;
+ int num_inos;
+ ino_tree_node_t *irec;
+ ino_tree_node_t *cur_irec;
+ int blks_per_cluster;
+ int inos_per_cluster;
+ xfs_agblock_t bno;
int i;
- int error;
-
- nmap = 1;
- error = libxfs_bmapi(NULL, ip, (xfs_fileoff_t) da_bno, 1,
- XFS_BMAPI_METADATA, fblockp, 0,
- &map, &nmap, NULL);
- if (error || (nmap != 1)) {
- return;
+ int err;
+
+ blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog;
+ if (blks_per_cluster == 0)
+ blks_per_cluster = 1;
+ inos_per_cluster = blks_per_cluster * mp->m_sb.sb_inopblock;
+
+ for (i = 0; i < PF_THREAD_COUNT; i++) {
+ err = pthread_create(&args->io_threads[i], NULL,
+ pf_io_worker, args);
+ if (err != 0) {
+ do_warn(_("failed to create prefetch thread: %s\n"),
+ strerror(err));
+ if (i == 0) {
+ pf_start_processing(args);
+ return NULL;
+ }
+ /*
+ * since we have at least one I/O thread, use them for
+ * prefetch
+ */
+ break;
+ }
}
- if ((fsbno = map.br_startblock) == HOLESTARTBLOCK)
- return;
+#ifdef XR_PF_TRACE
+ pftrace("starting prefetch for AG %d", args->agno);
+#endif
- bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, fsbno),
- XFS_FSB_TO_BB(mp, 1), 0);
+ for (irec = findfirst_inode_rec(args->agno); irec != NULL;
+ irec = next_ino_rec(irec)) {
- if (bp == NULL)
- return;
+ cur_irec = irec;
+ num_inos = XFS_INODES_PER_CHUNK;
+ while (num_inos < XFS_IALLOC_INODES(mp) && irec != NULL) {
+ irec = next_ino_rec(irec);
+ num_inos += XFS_INODES_PER_CHUNK;
+ }
- node = (xfs_da_intnode_t *)XFS_BUF_PTR(bp);
- if (INT_GET(node->hdr.info.magic, ARCH_CONVERT) != XFS_DA_NODE_MAGIC) {
- libxfs_putbuf(bp);
- return;
+ if (args->dirs_only && cur_irec->ino_isa_dir == 0)
+ continue;
+#ifdef XR_PF_TRACE
+ sem_getvalue(&args->ra_count, &i);
+ pftrace("queuing irec %p in AG %d, sem count = %d",
+ irec, args->agno, i);
+#endif
+ sem_wait(&args->ra_count);
+
+ num_inos = 0;
+ bno = XFS_AGINO_TO_AGBNO(mp, cur_irec->ino_startnum);
+
+ do {
+ pf_queue_io(args, XFS_AGB_TO_FSB(mp, args->agno, bno),
+ blks_per_cluster, B_INODE);
+ bno += blks_per_cluster;
+ num_inos += inos_per_cluster;
+ } while (num_inos < XFS_IALLOC_INODES(mp));
}
- prefetch_p6_node(mp, ip, bp);
+ pthread_mutex_lock(&args->lock);
- /* skip prefetching if next level is leaf level */
- if (INT_GET(node->hdr.level, ARCH_CONVERT) > 1) {
- for (i = 0; i < INT_GET(node->hdr.count, ARCH_CONVERT); i++) {
- (void) prefetch_p6_dir1(mp, ino, ip,
- INT_GET(node->btree[i].before, ARCH_CONVERT),
- fblockp);
- }
- }
-
- libxfs_putbuf(bp);
- return;
+#ifdef XR_PF_TRACE
+ pftrace("finished queuing inodes for AG %d (inode_bufs_queued = %d)",
+ args->agno, args->inode_bufs_queued);
+#endif
+ args->queuing_done = 1;
+ pf_start_io_workers(args);
+ pf_start_processing(args);
+ pthread_mutex_unlock(&args->lock);
+
+ /* now wait for the readers to finish */
+ for (i = 0; i < PF_THREAD_COUNT; i++)
+ if (args->io_threads[i])
+ pthread_join(args->io_threads[i], NULL);
+
+#ifdef XR_PF_TRACE
+ pftrace("prefetch for AG %d finished", args->agno);
+#endif
+ pthread_mutex_lock(&args->lock);
+
+ ASSERT(args->primary_io_queue.height == 0);
+ ASSERT(args->secondary_io_queue.height == 0);
+
+ args->prefetch_done = 1;
+ if (args->next_args)
+ pf_create_prefetch_thread(args->next_args);
+
+ pthread_mutex_unlock(&args->lock);
+
+ return NULL;
}
-#define NMAPP 4
+static int
+pf_create_prefetch_thread(
+ prefetch_args_t *args)
+{
+ int err;
+
+#ifdef XR_PF_TRACE
+ pftrace("creating queue thread for AG %d", args->agno);
+#endif
+ err = pthread_create(&args->queuing_thread, NULL,
+ pf_queuing_worker, args);
+ if (err != 0) {
+ do_warn(_("failed to create prefetch thread: %s\n"),
+ strerror(err));
+ cleanup_inode_prefetch(args);
+ }
+
+ return err == 0;
+}
void
-prefetch_p6_dir2(
- xfs_mount_t *mp,
- xfs_inode_t *ip)
+init_prefetch(
+ xfs_mount_t *pmp)
{
- xfs_fileoff_t da_bno;
- xfs_fileoff_t next_da_bno;
- int i, j;
- libxfs_lio_req_t *liop;
- xfs_fsblock_t fsb;
- int nfsb;
- int error;
-
- if ((liop = (libxfs_lio_req_t *) libxfs_get_lio_buffer(LIBXFS_LIO_TYPE_DIR)) == NULL) {
- return;
- }
- i = 0;
- for (da_bno = 0, next_da_bno = 0; next_da_bno != NULLFILEOFF; da_bno = next_da_bno) {
- if (i == libxfs_lio_dir_count)
- break;
- next_da_bno = da_bno + mp->m_dirblkfsbs - 1;
- if (libxfs_bmap_next_offset(NULL, ip, &next_da_bno, XFS_DATA_FORK))
- break;
+ mp = pmp;
+ mp_fd = libxfs_device_to_fd(mp->m_dev);
+ pf_max_bytes = sysconf(_SC_PAGE_SIZE) << 7;
+ pf_max_bbs = pf_max_bytes >> BBSHIFT;
+ pf_max_fsbs = pf_max_bytes >> mp->m_sb.sb_blocklog;
+ pf_batch_bytes = DEF_BATCH_BYTES;
+ pf_batch_fsbs = DEF_BATCH_BYTES >> (mp->m_sb.sb_blocklog + 1);
+}
- if (mp->m_dirblkfsbs == 1) {
- if ((error = libxfs_bmapi_single(NULL, ip, XFS_DATA_FORK, &fsb, da_bno)) != 0) {
- libxfs_put_lio_buffer((void *) liop);
- do_prefetch = 0;
- do_warn("phase6 prefetch: cannot bmap single block err = %d\n", error);
- return;
- }
- if (fsb == NULLFSBLOCK) {
- libxfs_put_lio_buffer((void *) liop);
- return;
- }
+prefetch_args_t *
+start_inode_prefetch(
+ xfs_agnumber_t agno,
+ int dirs_only,
+ prefetch_args_t *prev_args)
+{
+ prefetch_args_t *args;
- liop[i].blkno = XFS_FSB_TO_DADDR(mp, fsb);
- liop[i].len = XFS_FSB_TO_BB(mp, 1);
- i++;
- }
- else if ((nfsb = mp->m_dirblkfsbs) > 1) {
- xfs_fsblock_t firstblock;
- xfs_bmbt_irec_t map[NMAPP];
- xfs_bmbt_irec_t *mapp;
- int nmap;
-
- if (nfsb > NMAPP) {
- mapp = malloc(sizeof(*mapp) * nfsb);
- if (mapp == NULL) {
- libxfs_put_lio_buffer((void *) liop);
- do_prefetch = 0;
- do_warn("phase6 prefetch: cannot allocate mem for map\n");
- return;
- }
- }
- else {
- mapp = map;
- }
- firstblock = NULLFSBLOCK;
- nmap = nfsb;
- if ((error = libxfs_bmapi(NULL, ip, da_bno,
- nfsb,
- XFS_BMAPI_METADATA | XFS_BMAPI_AFLAG(XFS_DATA_FORK),
- &firstblock, 0, mapp, &nmap, NULL))) {
- libxfs_put_lio_buffer((void *) liop);
- do_prefetch = 0;
- do_warn("phase6 prefetch: cannot bmap err = %d\n", error);
- return;
- }
- for (j = 0; j < nmap; j++) {
- liop[i].blkno = XFS_FSB_TO_DADDR(mp, mapp[j].br_startblock);
- liop[i].len = (int)XFS_FSB_TO_BB(mp, mapp[j].br_blockcount);
- i++;
- if (i == libxfs_lio_dir_count)
- break; /* for loop */
- }
- if (mapp != map)
- free(mapp);
+ if (!do_prefetch || agno >= mp->m_sb.sb_agcount)
+ return NULL;
- }
- else {
- do_error("phase6: invalid mp->m_dirblkfsbs %d\n", mp->m_dirblkfsbs);
- }
- }
- if (i > 1) {
- if (libxfs_readbuf_list(mp->m_dev, i, (void *) liop, LIBXFS_LIO_TYPE_DIR) == -1)
- do_prefetch = 0;
+ args = calloc(1, sizeof(prefetch_args_t));
+
+ INIT_RADIX_TREE(&args->primary_io_queue, 0);
+ INIT_RADIX_TREE(&args->secondary_io_queue, 0);
+ pthread_mutex_init(&args->lock, NULL);
+ pthread_cond_init(&args->start_reading, NULL);
+ pthread_cond_init(&args->start_processing, NULL);
+ args->agno = agno;
+ args->dirs_only = dirs_only;
+
+ /*
+ * use only 1/8 of the libxfs cache as we are only counting inodes
+ * and not any other associated metadata like directories
+ */
+
+ sem_init(&args->ra_count, 0, libxfs_bcache->c_maxcount / thread_count /
+ (XFS_IALLOC_BLOCKS(mp) / (XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog)) / 8);
+
+ if (!prev_args) {
+ if (!pf_create_prefetch_thread(args))
+ return NULL;
+ } else {
+ pthread_mutex_lock(&prev_args->lock);
+ if (prev_args->prefetch_done) {
+ if (!pf_create_prefetch_thread(args))
+ args = NULL;
+ } else
+ prev_args->next_args = args;
+ pthread_mutex_unlock(&prev_args->lock);
}
- libxfs_put_lio_buffer((void *) liop);
+
+ return args;
}
void
-prefetch_sb(xfs_mount_t *mp, xfs_agnumber_t agno)
+wait_for_inode_prefetch(
+ prefetch_args_t *args)
{
- libxfs_lio_req_t *liop;
-
- if ((liop = (libxfs_lio_req_t *) libxfs_get_lio_buffer(LIBXFS_LIO_TYPE_RAW)) == NULL) {
- do_prefetch = 0;
+ if (args == NULL)
return;
+
+ pthread_mutex_lock(&args->lock);
+
+ while (!args->can_start_processing) {
+#ifdef XR_PF_TRACE
+ pftrace("waiting to start processing AG %d", args->agno);
+#endif
+ pthread_cond_wait(&args->start_processing, &args->lock);
}
+#ifdef XR_PF_TRACE
+ pftrace("can start processing AG %d", args->agno);
+#endif
+ pthread_mutex_unlock(&args->lock);
+}
- liop[0].blkno = XFS_AG_DADDR(mp, agno, XFS_SB_DADDR);
- liop[1].blkno = XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp));
- liop[2].blkno = XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp));
- liop[0].len = XFS_FSS_TO_BB(mp, 1);
- liop[1].len = XFS_FSS_TO_BB(mp, 1);
- liop[2].len = XFS_FSS_TO_BB(mp, 1);
- if (libxfs_readbuf_list(mp->m_dev, 3, (void *) liop, LIBXFS_LIO_TYPE_RAW) == -1)
- do_prefetch = 0;
+void
+cleanup_inode_prefetch(
+ prefetch_args_t *args)
+{
+ if (args == NULL)
+ return;
- libxfs_put_lio_buffer((void *) liop);
+#ifdef XR_PF_TRACE
+ pftrace("waiting AG %d prefetch to finish", args->agno);
+#endif
+ if (args->queuing_thread)
+ pthread_join(args->queuing_thread, NULL);
+
+#ifdef XR_PF_TRACE
+ pftrace("AG %d prefetch done", args->agno);
+#endif
+ pthread_mutex_destroy(&args->lock);
+ pthread_cond_destroy(&args->start_reading);
+ pthread_cond_destroy(&args->start_processing);
+ sem_destroy(&args->ra_count);
+
+ free(args);
}
+#ifdef XR_PF_TRACE
+
void
-prefetch_roots(xfs_mount_t *mp, xfs_agnumber_t agno,
- xfs_agf_t *agf, xfs_agi_t *agi)
+_pftrace(const char *func, const char *msg, ...)
{
- int i;
- libxfs_lio_req_t *liop;
+ char buf[200];
+ struct timeval tv;
+ va_list args;
- if ((liop = (libxfs_lio_req_t *) libxfs_get_lio_buffer(LIBXFS_LIO_TYPE_RAW)) == NULL) {
- do_prefetch = 0;
- return;
- }
+ gettimeofday(&tv, NULL);
- i = 0;
- if (agf->agf_roots[XFS_BTNUM_BNO] != 0 &&
- verify_agbno(mp, agno, agf->agf_roots[XFS_BTNUM_BNO])) {
- liop[i].blkno = XFS_AGB_TO_DADDR(mp, agno, agf->agf_roots[XFS_BTNUM_BNO]);
- liop[i].len = XFS_FSB_TO_BB(mp, 1);
- i++;
- }
- if (agf->agf_roots[XFS_BTNUM_CNT] != 0 &&
- verify_agbno(mp, agno, agf->agf_roots[XFS_BTNUM_CNT])) {
- liop[i].blkno = XFS_AGB_TO_DADDR(mp, agno, agf->agf_roots[XFS_BTNUM_CNT]);
- liop[i].len = XFS_FSB_TO_BB(mp, 1);
- i++;
- }
- if (agi->agi_root != 0 && verify_agbno(mp, agno, agi->agi_root)) {
- liop[i].blkno = XFS_AGB_TO_DADDR(mp, agno, agi->agi_root);
- liop[i].len = XFS_FSB_TO_BB(mp, 1);
- i++;
- }
- if (i > 1) {
- if (libxfs_readbuf_list(mp->m_dev, i, (void *) liop, LIBXFS_LIO_TYPE_RAW) == -1)
- do_prefetch = 0;
- }
+ va_start(args, msg);
+ vsnprintf(buf, sizeof(buf), msg, args);
+ buf[sizeof(buf)-1] = '\0';
+ va_end(args);
- libxfs_put_lio_buffer((void *) liop);
+ fprintf(pf_trace_file, "%lu.%06lu %s: %s\n", tv.tv_sec, tv.tv_usec, func, buf);
}
+
+#endif
#ifndef _XFS_REPAIR_PREFETCH_H
#define _XFS_REPAIR_PREFETCH_H
-struct blkmap;
-struct da_bt_cursor;
-struct xfs_mount;
-
-extern int do_prefetch;
-
-struct ino_tree_node *prefetch_inode_chunks(
- struct xfs_mount *,
- xfs_agnumber_t,
- struct ino_tree_node *);
-
-extern void prefetch_dir1(
- struct xfs_mount *mp,
- xfs_dablk_t bno,
- struct da_bt_cursor *da_cursor);
-
-extern void prefetch_dir2(
- struct xfs_mount *mp,
- struct blkmap *blkmap);
-
-extern void prefetch_p6_dir1(
- struct xfs_mount *mp,
- xfs_ino_t ino,
- struct xfs_inode *ip,
- xfs_dablk_t da_bno,
- xfs_fsblock_t *fblockp);
-
-extern void prefetch_p6_dir2(
- struct xfs_mount *mp,
- struct xfs_inode *ip);
-
-extern void prefetch_sb(
- struct xfs_mount *mp,
- xfs_agnumber_t agno);
-
-extern void prefetch_roots(
- struct xfs_mount *mp,
- xfs_agnumber_t agno,
- xfs_agf_t *agf,
- xfs_agi_t *agi);
+#include <semaphore.h>
+#include "incore.h"
+#include "radix-tree.h"
+
+
+extern int do_prefetch;
+
+#define PF_THREAD_COUNT 4
+
+typedef struct prefetch_args {
+ pthread_mutex_t lock;
+ pthread_t queuing_thread;
+ pthread_t io_threads[PF_THREAD_COUNT];
+ struct radix_tree_root primary_io_queue;
+ struct radix_tree_root secondary_io_queue;
+ pthread_cond_t start_reading;
+ pthread_cond_t start_processing;
+ int agno;
+ int dirs_only;
+ volatile int can_start_reading;
+ volatile int can_start_processing;
+ volatile int prefetch_done;
+ volatile int queuing_done;
+ volatile int inode_bufs_queued;
+ volatile xfs_fsblock_t last_bno_read;
+ sem_t ra_count;
+ struct prefetch_args *next_args;
+} prefetch_args_t;
+
+
+
+void
+init_prefetch(
+ xfs_mount_t *pmp);
+
+prefetch_args_t *
+start_inode_prefetch(
+ xfs_agnumber_t agno,
+ int dirs_only,
+ prefetch_args_t *prev_args);
+
+void
+wait_for_inode_prefetch(
+ prefetch_args_t *args);
+
+void
+cleanup_inode_prefetch(
+ prefetch_args_t *args);
+
+
+#ifdef XR_PF_TRACE
+#define pftrace(msg...) _pftrace(__FUNCTION__, ## msg)
+void _pftrace(const char *, const char *, ...);
+#endif
#endif /* _XFS_REPAIR_PREFETCH_H */
#include <libxfs.h>
-#include "progress.h"
#include "globals.h"
+#include "progress.h"
#include "err_protos.h"
#include <signal.h>
time_t start;
time_t end;
time_t duration;
- __uint64_t item_counts[4];
+ __uint64_t item_counts[4];
} phase_times_t;
static phase_times_t phase_times[8];
/*
* Specify a repeating timer that fires each MSG_INTERVAL seconds.
*/
-
+
timespec.it_value.tv_sec = msgp->interval;
timespec.it_value.tv_nsec = 0;
timespec.it_interval.tv_sec = msgp->interval;
set_progress_msg (int report, __uint64_t total)
{
- if (!do_parallel)
+ if (!ag_stride)
return (0);
if (pthread_mutex_lock(&global_msgs.mutex))
__uint64_t sum;
msg_block_t *msgp = &global_msgs;
char msgbuf[DURATION_BUF_SIZE];
-
- if (!do_parallel)
+
+ if (!ag_stride)
return 0;
if (pthread_mutex_lock(&global_msgs.mutex))
time_t now;
struct tm *tmp;
+ if (verbose > 1)
+ cache_report(stderr, "libxfs_bcache", libxfs_bcache);
+
now = time(NULL);
if (end) {
}
strcat(buf, temp);
}
-
+
}
if (length >= ONEMINUTE) {
minutes = (length - sum) / ONEMINUTE;
strcat(buf, _(", "));
strcat(buf, temp);
}
-
+
return(buf);
}
#define PROG_FMT_REBUILD_AG 9 /* Phase 5 */
#define PROG_FMT_TRAVERSAL 10 /* Phase 6 */
-#define PROG_FMT_TRAVERSSUB 11
-#define PROG_FMT_DISCONINODE 12
+#define PROG_FMT_TRAVERSSUB 11
+#define PROG_FMT_DISCONINODE 12
#define PROGRESS_FMT_CORR_LINK 13 /* Phase 7 */
#define PROGRESS_FMT_VRFY_LINK 14
extern char *duration(int val, char *buf);
extern int do_parallel;
-#define PROG_RPT_INC(a,b) if (do_parallel && prog_rpt_done) (a) += (b)
+#define PROG_RPT_INC(a,b) if (ag_stride && prog_rpt_done) (a) += (b)
#endif /* _XFS_REPAIR_PROGRESS_RPT_H_ */
--- /dev/null
+/*
+ * Copyright (C) 2001 Momchil Velikov
+ * Portions Copyright (C) 2001 Christoph Hellwig
+ * Copyright (C) 2005 SGI, Christoph Lameter <clameter@sgi.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <libxfs.h>
+#include "radix-tree.h"
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+#define RADIX_TREE_MAP_SHIFT 6
+#define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT)
+#define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE-1)
+
+#ifdef RADIX_TREE_TAGS
+#define RADIX_TREE_TAG_LONGS \
+ ((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG)
+#endif
+
+struct radix_tree_node {
+ unsigned int count;
+ void *slots[RADIX_TREE_MAP_SIZE];
+#ifdef RADIX_TREE_TAGS
+ unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];
+#endif
+};
+
+struct radix_tree_path {
+ struct radix_tree_node *node;
+ int offset;
+};
+
+#define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long))
+#define RADIX_TREE_MAX_PATH (RADIX_TREE_INDEX_BITS/RADIX_TREE_MAP_SHIFT + 2)
+
+static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH];
+
+/*
+ * Radix tree node cache.
+ */
+
+#define radix_tree_node_alloc(r) ((struct radix_tree_node *) \
+ calloc(1, sizeof(struct radix_tree_node)))
+#define radix_tree_node_free(n) free(n)
+
+#ifdef RADIX_TREE_TAGS
+
+static inline void tag_set(struct radix_tree_node *node, unsigned int tag,
+ int offset)
+{
+ *((__uint32_t *)node->tags[tag] + (offset >> 5)) |= (1 << (offset & 31));
+}
+
+static inline void tag_clear(struct radix_tree_node *node, unsigned int tag,
+ int offset)
+{
+ __uint32_t *p = (__uint32_t*)node->tags[tag] + (offset >> 5);
+ __uint32_t m = 1 << (offset & 31);
+ *p &= ~m;
+}
+
+static inline int tag_get(struct radix_tree_node *node, unsigned int tag,
+ int offset)
+{
+ return 1 & (((const __uint32_t *)node->tags[tag])[offset >> 5] >> (offset & 31));
+}
+
+/*
+ * Returns 1 if any slot in the node has this tag set.
+ * Otherwise returns 0.
+ */
+static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag)
+{
+ int idx;
+ for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) {
+ if (node->tags[tag][idx])
+ return 1;
+ }
+ return 0;
+}
+
+#endif
+
+/*
+ * Return the maximum key which can be store into a
+ * radix tree with height HEIGHT.
+ */
+static inline unsigned long radix_tree_maxindex(unsigned int height)
+{
+ return height_to_maxindex[height];
+}
+
+/*
+ * Extend a radix tree so it can store key @index.
+ */
+static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
+{
+ struct radix_tree_node *node;
+ unsigned int height;
+#ifdef RADIX_TREE_TAGS
+ char tags[RADIX_TREE_MAX_TAGS];
+ int tag;
+#endif
+
+ /* Figure out what the height should be. */
+ height = root->height + 1;
+ while (index > radix_tree_maxindex(height))
+ height++;
+
+ if (root->rnode == NULL) {
+ root->height = height;
+ goto out;
+ }
+
+#ifdef RADIX_TREE_TAGS
+ /*
+ * Prepare the tag status of the top-level node for propagation
+ * into the newly-pushed top-level node(s)
+ */
+ for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
+ tags[tag] = 0;
+ if (any_tag_set(root->rnode, tag))
+ tags[tag] = 1;
+ }
+#endif
+ do {
+ if (!(node = radix_tree_node_alloc(root)))
+ return -ENOMEM;
+
+ /* Increase the height. */
+ node->slots[0] = root->rnode;
+
+#ifdef RADIX_TREE_TAGS
+ /* Propagate the aggregated tag info into the new root */
+ for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
+ if (tags[tag])
+ tag_set(node, tag, 0);
+ }
+#endif
+ node->count = 1;
+ root->rnode = node;
+ root->height++;
+ } while (height > root->height);
+out:
+ return 0;
+}
+
+/**
+ * radix_tree_insert - insert into a radix tree
+ * @root: radix tree root
+ * @index: index key
+ * @item: item to insert
+ *
+ * Insert an item into the radix tree at position @index.
+ */
+int radix_tree_insert(struct radix_tree_root *root,
+ unsigned long index, void *item)
+{
+ struct radix_tree_node *node = NULL, *slot;
+ unsigned int height, shift;
+ int offset;
+ int error;
+
+ /* Make sure the tree is high enough. */
+ if ((!index && !root->rnode) ||
+ index > radix_tree_maxindex(root->height)) {
+ error = radix_tree_extend(root, index);
+ if (error)
+ return error;
+ }
+
+ slot = root->rnode;
+ height = root->height;
+ shift = (height-1) * RADIX_TREE_MAP_SHIFT;
+
+ offset = 0; /* uninitialised var warning */
+ do {
+ if (slot == NULL) {
+ /* Have to add a child node. */
+ if (!(slot = radix_tree_node_alloc(root)))
+ return -ENOMEM;
+ if (node) {
+ node->slots[offset] = slot;
+ node->count++;
+ } else
+ root->rnode = slot;
+ }
+
+ /* Go a level down */
+ offset = (index >> shift) & RADIX_TREE_MAP_MASK;
+ node = slot;
+ slot = node->slots[offset];
+ shift -= RADIX_TREE_MAP_SHIFT;
+ height--;
+ } while (height > 0);
+
+ if (slot != NULL)
+ return -EEXIST;
+
+ ASSERT(node);
+ node->count++;
+ node->slots[offset] = item;
+#ifdef RADIX_TREE_TAGS
+ ASSERT(!tag_get(node, 0, offset));
+ ASSERT(!tag_get(node, 1, offset));
+#endif
+ return 0;
+}
+
+static inline void **__lookup_slot(struct radix_tree_root *root,
+ unsigned long index)
+{
+ unsigned int height, shift;
+ struct radix_tree_node **slot;
+
+ height = root->height;
+ if (index > radix_tree_maxindex(height))
+ return NULL;
+
+ shift = (height-1) * RADIX_TREE_MAP_SHIFT;
+ slot = &root->rnode;
+
+ while (height > 0) {
+ if (*slot == NULL)
+ return NULL;
+
+ slot = (struct radix_tree_node **)
+ ((*slot)->slots +
+ ((index >> shift) & RADIX_TREE_MAP_MASK));
+ shift -= RADIX_TREE_MAP_SHIFT;
+ height--;
+ }
+
+ return (void **)slot;
+}
+
+/**
+ * radix_tree_lookup_slot - lookup a slot in a radix tree
+ * @root: radix tree root
+ * @index: index key
+ *
+ * Lookup the slot corresponding to the position @index in the radix tree
+ * @root. This is useful for update-if-exists operations.
+ */
+void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index)
+{
+ return __lookup_slot(root, index);
+}
+
+/**
+ * radix_tree_lookup - perform lookup operation on a radix tree
+ * @root: radix tree root
+ * @index: index key
+ *
+ * Lookup the item at the position @index in the radix tree @root.
+ */
+void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index)
+{
+ void **slot;
+
+ slot = __lookup_slot(root, index);
+ return slot != NULL ? *slot : NULL;
+}
+
+/**
+ * raid_tree_first_key - find the first index key in the radix tree
+ * @root: radix tree root
+ * @index: where the first index will be placed
+ *
+ * Returns the first entry and index key in the radix tree @root.
+ */
+void *radix_tree_lookup_first(struct radix_tree_root *root, unsigned long *index)
+{
+ unsigned int height, shift;
+ struct radix_tree_node *slot;
+ unsigned long i;
+
+ height = root->height;
+ *index = 0;
+ if (height == 0)
+ return NULL;
+
+ shift = (height-1) * RADIX_TREE_MAP_SHIFT;
+ slot = root->rnode;
+
+ for (; height > 1; height--) {
+ for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
+ if (slot->slots[i] != NULL)
+ break;
+ }
+ ASSERT(i < RADIX_TREE_MAP_SIZE);
+
+ *index |= (i << shift);
+ shift -= RADIX_TREE_MAP_SHIFT;
+ slot = slot->slots[i];
+ }
+ for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
+ if (slot->slots[i] != NULL) {
+ *index |= i;
+ return slot->slots[i];
+ }
+ }
+ return NULL;
+}
+
+#ifdef RADIX_TREE_TAGS
+
+/**
+ * radix_tree_tag_set - set a tag on a radix tree node
+ * @root: radix tree root
+ * @index: index key
+ * @tag: tag index
+ *
+ * Set the search tag (which must be < RADIX_TREE_MAX_TAGS)
+ * corresponding to @index in the radix tree. From
+ * the root all the way down to the leaf node.
+ *
+ * Returns the address of the tagged item. Setting a tag on a not-present
+ * item is a bug.
+ */
+void *radix_tree_tag_set(struct radix_tree_root *root,
+ unsigned long index, unsigned int tag)
+{
+ unsigned int height, shift;
+ struct radix_tree_node *slot;
+
+ height = root->height;
+ if (index > radix_tree_maxindex(height))
+ return NULL;
+
+ shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
+ slot = root->rnode;
+
+ while (height > 0) {
+ int offset;
+
+ offset = (index >> shift) & RADIX_TREE_MAP_MASK;
+ if (!tag_get(slot, tag, offset))
+ tag_set(slot, tag, offset);
+ slot = slot->slots[offset];
+ ASSERT(slot != NULL);
+ shift -= RADIX_TREE_MAP_SHIFT;
+ height--;
+ }
+
+ return slot;
+}
+
+/**
+ * radix_tree_tag_clear - clear a tag on a radix tree node
+ * @root: radix tree root
+ * @index: index key
+ * @tag: tag index
+ *
+ * Clear the search tag (which must be < RADIX_TREE_MAX_TAGS)
+ * corresponding to @index in the radix tree. If
+ * this causes the leaf node to have no tags set then clear the tag in the
+ * next-to-leaf node, etc.
+ *
+ * Returns the address of the tagged item on success, else NULL. ie:
+ * has the same return value and semantics as radix_tree_lookup().
+ */
+void *radix_tree_tag_clear(struct radix_tree_root *root,
+ unsigned long index, unsigned int tag)
+{
+ struct radix_tree_path path[RADIX_TREE_MAX_PATH], *pathp = path;
+ struct radix_tree_node *slot;
+ unsigned int height, shift;
+ void *ret = NULL;
+
+ height = root->height;
+ if (index > radix_tree_maxindex(height))
+ goto out;
+
+ shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
+ pathp->node = NULL;
+ slot = root->rnode;
+
+ while (height > 0) {
+ int offset;
+
+ if (slot == NULL)
+ goto out;
+
+ offset = (index >> shift) & RADIX_TREE_MAP_MASK;
+ pathp[1].offset = offset;
+ pathp[1].node = slot;
+ slot = slot->slots[offset];
+ pathp++;
+ shift -= RADIX_TREE_MAP_SHIFT;
+ height--;
+ }
+
+ ret = slot;
+ if (ret == NULL)
+ goto out;
+
+ do {
+ if (!tag_get(pathp->node, tag, pathp->offset))
+ goto out;
+ tag_clear(pathp->node, tag, pathp->offset);
+ if (any_tag_set(pathp->node, tag))
+ goto out;
+ pathp--;
+ } while (pathp->node);
+out:
+ return ret;
+}
+
+#endif
+
+static unsigned int
+__lookup(struct radix_tree_root *root, void **results, unsigned long index,
+ unsigned int max_items, unsigned long *next_index)
+{
+ unsigned int nr_found = 0;
+ unsigned int shift, height;
+ struct radix_tree_node *slot;
+ unsigned long i;
+
+ height = root->height;
+ if (height == 0)
+ goto out;
+
+ shift = (height-1) * RADIX_TREE_MAP_SHIFT;
+ slot = root->rnode;
+
+ for ( ; height > 1; height--) {
+
+ for (i = (index >> shift) & RADIX_TREE_MAP_MASK ;
+ i < RADIX_TREE_MAP_SIZE; i++) {
+ if (slot->slots[i] != NULL)
+ break;
+ index &= ~((1UL << shift) - 1);
+ index += 1UL << shift;
+ if (index == 0)
+ goto out; /* 32-bit wraparound */
+ }
+ if (i == RADIX_TREE_MAP_SIZE)
+ goto out;
+
+ shift -= RADIX_TREE_MAP_SHIFT;
+ slot = slot->slots[i];
+ }
+
+ /* Bottom level: grab some items */
+ for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) {
+ index++;
+ if (slot->slots[i]) {
+ results[nr_found++] = slot->slots[i];
+ if (nr_found == max_items)
+ goto out;
+ }
+ }
+out:
+ *next_index = index;
+ return nr_found;
+}
+
+/**
+ * radix_tree_gang_lookup - perform multiple lookup on a radix tree
+ * @root: radix tree root
+ * @results: where the results of the lookup are placed
+ * @first_index: start the lookup from this key
+ * @max_items: place up to this many items at *results
+ *
+ * Performs an index-ascending scan of the tree for present items. Places
+ * them at *@results and returns the number of items which were placed at
+ * *@results.
+ *
+ * The implementation is naive.
+ */
+unsigned int
+radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
+ unsigned long first_index, unsigned int max_items)
+{
+ const unsigned long max_index = radix_tree_maxindex(root->height);
+ unsigned long cur_index = first_index;
+ unsigned int ret = 0;
+
+ while (ret < max_items) {
+ unsigned int nr_found;
+ unsigned long next_index; /* Index of next search */
+
+ if (cur_index > max_index)
+ break;
+ nr_found = __lookup(root, results + ret, cur_index,
+ max_items - ret, &next_index);
+ ret += nr_found;
+ if (next_index == 0)
+ break;
+ cur_index = next_index;
+ }
+ return ret;
+}
+
+/**
+ * radix_tree_gang_lookup_ex - perform multiple lookup on a radix tree
+ * @root: radix tree root
+ * @results: where the results of the lookup are placed
+ * @first_index: start the lookup from this key
+ * @last_index: don't lookup past this key
+ * @max_items: place up to this many items at *results
+ *
+ * Performs an index-ascending scan of the tree for present items starting
+ * @first_index until @last_index up to as many as @max_items. Places
+ * them at *@results and returns the number of items which were placed
+ * at *@results.
+ *
+ * The implementation is naive.
+ */
+unsigned int
+radix_tree_gang_lookup_ex(struct radix_tree_root *root, void **results,
+ unsigned long first_index, unsigned long last_index,
+ unsigned int max_items)
+{
+ const unsigned long max_index = radix_tree_maxindex(root->height);
+ unsigned long cur_index = first_index;
+ unsigned int ret = 0;
+
+ while (ret < max_items && cur_index < last_index) {
+ unsigned int nr_found;
+ unsigned long next_index; /* Index of next search */
+
+ if (cur_index > max_index)
+ break;
+ nr_found = __lookup(root, results + ret, cur_index,
+ max_items - ret, &next_index);
+ ret += nr_found;
+ if (next_index == 0)
+ break;
+ cur_index = next_index;
+ }
+ return ret;
+}
+
+#ifdef RADIX_TREE_TAGS
+
+static unsigned int
+__lookup_tag(struct radix_tree_root *root, void **results, unsigned long index,
+ unsigned int max_items, unsigned long *next_index, unsigned int tag)
+{
+ unsigned int nr_found = 0;
+ unsigned int shift;
+ unsigned int height = root->height;
+ struct radix_tree_node *slot;
+
+ shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
+ slot = root->rnode;
+
+ while (height > 0) {
+ unsigned long i = (index >> shift) & RADIX_TREE_MAP_MASK;
+
+ for ( ; i < RADIX_TREE_MAP_SIZE; i++) {
+ if (tag_get(slot, tag, i)) {
+ ASSERT(slot->slots[i] != NULL);
+ break;
+ }
+ index &= ~((1UL << shift) - 1);
+ index += 1UL << shift;
+ if (index == 0)
+ goto out; /* 32-bit wraparound */
+ }
+ if (i == RADIX_TREE_MAP_SIZE)
+ goto out;
+ height--;
+ if (height == 0) { /* Bottom level: grab some items */
+ unsigned long j = index & RADIX_TREE_MAP_MASK;
+
+ for ( ; j < RADIX_TREE_MAP_SIZE; j++) {
+ index++;
+ if (tag_get(slot, tag, j)) {
+ ASSERT(slot->slots[j] != NULL);
+ results[nr_found++] = slot->slots[j];
+ if (nr_found == max_items)
+ goto out;
+ }
+ }
+ }
+ shift -= RADIX_TREE_MAP_SHIFT;
+ slot = slot->slots[i];
+ }
+out:
+ *next_index = index;
+ return nr_found;
+}
+
+/**
+ * radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree
+ * based on a tag
+ * @root: radix tree root
+ * @results: where the results of the lookup are placed
+ * @first_index: start the lookup from this key
+ * @max_items: place up to this many items at *results
+ * @tag: the tag index (< RADIX_TREE_MAX_TAGS)
+ *
+ * Performs an index-ascending scan of the tree for present items which
+ * have the tag indexed by @tag set. Places the items at *@results and
+ * returns the number of items which were placed at *@results.
+ */
+unsigned int
+radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
+ unsigned long first_index, unsigned int max_items,
+ unsigned int tag)
+{
+ const unsigned long max_index = radix_tree_maxindex(root->height);
+ unsigned long cur_index = first_index;
+ unsigned int ret = 0;
+
+ while (ret < max_items) {
+ unsigned int nr_found;
+ unsigned long next_index; /* Index of next search */
+
+ if (cur_index > max_index)
+ break;
+ nr_found = __lookup_tag(root, results + ret, cur_index,
+ max_items - ret, &next_index, tag);
+ ret += nr_found;
+ if (next_index == 0)
+ break;
+ cur_index = next_index;
+ }
+ return ret;
+}
+
+#endif
+
+/**
+ * radix_tree_shrink - shrink height of a radix tree to minimal
+ * @root radix tree root
+ */
+static inline void radix_tree_shrink(struct radix_tree_root *root)
+{
+ /* try to shrink tree height */
+ while (root->height > 1 &&
+ root->rnode->count == 1 &&
+ root->rnode->slots[0]) {
+ struct radix_tree_node *to_free = root->rnode;
+
+ root->rnode = to_free->slots[0];
+ root->height--;
+ /* must only free zeroed nodes into the slab */
+#ifdef RADIX_TREE_TAGS
+ tag_clear(to_free, 0, 0);
+ tag_clear(to_free, 1, 0);
+#endif
+ to_free->slots[0] = NULL;
+ to_free->count = 0;
+ radix_tree_node_free(to_free);
+ }
+}
+
+/**
+ * radix_tree_delete - delete an item from a radix tree
+ * @root: radix tree root
+ * @index: index key
+ *
+ * Remove the item at @index from the radix tree rooted at @root.
+ *
+ * Returns the address of the deleted item, or NULL if it was not present.
+ */
+void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
+{
+ struct radix_tree_path path[RADIX_TREE_MAX_PATH], *pathp = path;
+ struct radix_tree_path *orig_pathp;
+ struct radix_tree_node *slot;
+ unsigned int height, shift;
+ void *ret = NULL;
+#ifdef RADIX_TREE_TAGS
+ char tags[RADIX_TREE_MAX_TAGS];
+ int nr_cleared_tags;
+ int tag;
+#endif
+ int offset;
+
+ height = root->height;
+ if (index > radix_tree_maxindex(height))
+ goto out;
+
+ shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
+ pathp->node = NULL;
+ slot = root->rnode;
+
+ for ( ; height > 0; height--) {
+ if (slot == NULL)
+ goto out;
+
+ pathp++;
+ offset = (index >> shift) & RADIX_TREE_MAP_MASK;
+ pathp->offset = offset;
+ pathp->node = slot;
+ slot = slot->slots[offset];
+ shift -= RADIX_TREE_MAP_SHIFT;
+ }
+
+ ret = slot;
+ if (ret == NULL)
+ goto out;
+
+ orig_pathp = pathp;
+
+#ifdef RADIX_TREE_TAGS
+ /*
+ * Clear all tags associated with the just-deleted item
+ */
+ nr_cleared_tags = 0;
+ for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
+ tags[tag] = 1;
+ if (tag_get(pathp->node, tag, pathp->offset)) {
+ tag_clear(pathp->node, tag, pathp->offset);
+ if (!any_tag_set(pathp->node, tag)) {
+ tags[tag] = 0;
+ nr_cleared_tags++;
+ }
+ }
+ }
+
+ for (pathp--; nr_cleared_tags && pathp->node; pathp--) {
+ for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
+ if (tags[tag])
+ continue;
+
+ tag_clear(pathp->node, tag, pathp->offset);
+ if (any_tag_set(pathp->node, tag)) {
+ tags[tag] = 1;
+ nr_cleared_tags--;
+ }
+ }
+ }
+#endif
+ /* Now free the nodes we do not need anymore */
+ for (pathp = orig_pathp; pathp->node; pathp--) {
+ pathp->node->slots[pathp->offset] = NULL;
+ pathp->node->count--;
+
+ if (pathp->node->count) {
+ if (pathp->node == root->rnode)
+ radix_tree_shrink(root);
+ goto out;
+ }
+
+ /* Node with zero slots in use so free it */
+ radix_tree_node_free(pathp->node);
+ }
+ root->rnode = NULL;
+ root->height = 0;
+out:
+ return ret;
+}
+
+#ifdef RADIX_TREE_TAGS
+/**
+ * radix_tree_tagged - test whether any items in the tree are tagged
+ * @root: radix tree root
+ * @tag: tag to test
+ */
+int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag)
+{
+ struct radix_tree_node *rnode;
+ rnode = root->rnode;
+ if (!rnode)
+ return 0;
+ return any_tag_set(rnode, tag);
+}
+#endif
+
+static unsigned long __maxindex(unsigned int height)
+{
+ unsigned int tmp = height * RADIX_TREE_MAP_SHIFT;
+ unsigned long index = (~0UL >> (RADIX_TREE_INDEX_BITS - tmp - 1)) >> 1;
+
+ if (tmp >= RADIX_TREE_INDEX_BITS)
+ index = ~0UL;
+ return index;
+}
+
+static void radix_tree_init_maxindex(void)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++)
+ height_to_maxindex[i] = __maxindex(i);
+}
+
+void radix_tree_init(void)
+{
+ radix_tree_init_maxindex();
+}
--- /dev/null
+/*
+ * Copyright (C) 2001 Momchil Velikov
+ * Portions Copyright (C) 2001 Christoph Hellwig
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#ifndef __XFS_SUPPORT_RADIX_TREE_H__
+#define __XFS_SUPPORT_RADIX_TREE_H__
+
+#define RADIX_TREE_TAGS
+
+struct radix_tree_root {
+ unsigned int height;
+ struct radix_tree_node *rnode;
+};
+
+#define RADIX_TREE_INIT(mask) { \
+ .height = 0, \
+ .rnode = NULL, \
+}
+
+#define RADIX_TREE(name, mask) \
+ struct radix_tree_root name = RADIX_TREE_INIT(mask)
+
+#define INIT_RADIX_TREE(root, mask) \
+do { \
+ (root)->height = 0; \
+ (root)->rnode = NULL; \
+} while (0)
+
+#ifdef RADIX_TREE_TAGS
+#define RADIX_TREE_MAX_TAGS 2
+#endif
+
+int radix_tree_insert(struct radix_tree_root *, unsigned long, void *);
+void *radix_tree_lookup(struct radix_tree_root *, unsigned long);
+void **radix_tree_lookup_slot(struct radix_tree_root *, unsigned long);
+void *radix_tree_lookup_first(struct radix_tree_root *, unsigned long *);
+void *radix_tree_delete(struct radix_tree_root *, unsigned long);
+unsigned int
+radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
+ unsigned long first_index, unsigned int max_items);
+unsigned int
+radix_tree_gang_lookup_ex(struct radix_tree_root *root, void **results,
+ unsigned long first_index, unsigned long last_index,
+ unsigned int max_items);
+
+void radix_tree_init(void);
+
+#ifdef RADIX_TREE_TAGS
+void *radix_tree_tag_set(struct radix_tree_root *root,
+ unsigned long index, unsigned int tag);
+void *radix_tree_tag_clear(struct radix_tree_root *root,
+ unsigned long index, unsigned int tag);
+int radix_tree_tag_get(struct radix_tree_root *root,
+ unsigned long index, unsigned int tag);
+unsigned int
+radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
+ unsigned long first_index, unsigned int max_items,
+ unsigned int tag);
+int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag);
+#endif
+
+#endif /* __XFS_SUPPORT_RADIX_TREE_H__ */
#include "scan.h"
#include "versions.h"
#include "bmap.h"
-#include "prefetch.h"
#include "progress.h"
extern int verify_set_agheader(xfs_mount_t *mp, xfs_buf_t *sbuf, xfs_sb_t *sb,
agi_dirty = agf_dirty = sb_dirty = 0;
- if (do_prefetch)
- prefetch_sb(mp, agno);
-
sbbuf = libxfs_readbuf(mp->m_dev, XFS_AG_DADDR(mp, agno, XFS_SB_DADDR),
XFS_FSS_TO_BB(mp, 1), 0);
if (!sbbuf) {
scan_freelist(agf);
- if (do_prefetch)
- prefetch_roots(mp, agno, agf, agi);
-
if (INT_GET(agf->agf_roots[XFS_BTNUM_BNO], ARCH_CONVERT) != 0 &&
verify_agbno(mp, agno,
INT_GET(agf->agf_roots[XFS_BTNUM_BNO], ARCH_CONVERT)))
#include <libxfs.h>
-#include "pthread.h"
-#include "signal.h"
+#include <pthread.h>
+#include <signal.h>
#include "threads.h"
#include "err_protos.h"
#include "protos.h"
+#include "globals.h"
-int do_parallel = 1;
-int thread_count;
-
-/* A quantum of work */
-typedef struct work_s {
- struct work_s *next;
- disp_func_t *function;
- xfs_mount_t *mp;
- xfs_agnumber_t agno;
-} work_t;
-
-typedef struct work_queue_s {
- work_t *next;
- work_t *last;
- int active_threads;
- int work_count;
- pthread_cond_t mcv; /* main thread conditional variable */
- pthread_cond_t wcv; /* worker threads conditional variable */
- pthread_mutex_t mutex;
-} work_queue_t;
-
-static work_queue_t work_queue;
-static pthread_t *work_threads;
-
-static void *worker_thread(void *arg);
-
-static void
-init_workers(work_queue_t *wq, int nw)
+static void *
+worker_thread(void *arg)
{
- int err;
- pthread_mutexattr_t mtxattr;
-
- memset(wq, 0, sizeof(work_queue_t));
- wq->active_threads = nw;
-
- pthread_cond_init(&wq->mcv, NULL);
- pthread_cond_init(&wq->wcv, NULL);
- pthread_mutexattr_init(&mtxattr);
-
-#ifdef PTHREAD_MUTEX_SPINBLOCK_NP
- /* NP - Non Portable - Irix */
- if ((err = pthread_mutexattr_settype(&mtxattr,
- PTHREAD_MUTEX_SPINBLOCK_NP)) > 0) {
- do_error(_("init_workers: thread 0x%x: pthread_mutexattr_settype error %d: %s\n"),
- pthread_self(), err, strerror(err));
- }
-#endif
-#ifdef PTHREAD_MUTEX_FAST_NP
- /* NP - Non Portable - Linux */
- if ((err = pthread_mutexattr_settype(&mtxattr,
- PTHREAD_MUTEX_FAST_NP)) > 0) {
- do_error(_("init_workers: thread 0x%x: pthread_mutexattr_settype error %d: %s\n"),
- pthread_self(), err, strerror(err));
- }
-#endif
- if ((err = pthread_mutex_init(&wq->mutex, &mtxattr)) > 0) {
- do_error(_("init_workers: thread 0x%x: pthread_mutex_init error %d: %s\n"),
- pthread_self(), err, strerror(err));
- }
-}
+ work_queue_t *wq;
+ work_item_t *wi;
-static void
-quiesce_workers(work_queue_t *wq)
-{
- int err;
+ wq = (work_queue_t*)arg;
- if ((err = pthread_mutex_lock(&wq->mutex)) > 0)
- do_error(_("quiesce_workers: thread 0x%x: pthread_mutex_lock error %d: %s\n"),
- pthread_self(), err, strerror(err));
- if (wq->active_threads > 0) {
- if ((err = pthread_cond_wait(&wq->mcv, &wq->mutex)) > 0)
- do_error(_("quiesce_workers: thread 0x%x: pthread_cond_wait error %d: %s\n"),
- pthread_self(), err, strerror(err));
- }
- ASSERT(wq->active_threads == 0);
- if ((err = pthread_mutex_unlock(&wq->mutex)) > 0)
- do_error(_("quiesce_workers: thread 0x%x: pthread_mutex_unlock error %d: %s\n"),
- pthread_self(), err, strerror(err));
-}
+ /*
+ * Loop pulling work from the passed in work queue.
+ * Check for notification to exit after every chunk of work.
+ */
+ while (1) {
+ pthread_mutex_lock(&wq->lock);
-static void
-start_workers(work_queue_t *wq, unsigned thcnt, pthread_attr_t *attrp)
-{
- int err;
- unsigned long i;
+ /*
+ * Wait for work.
+ */
+ while (wq->next_item == NULL && !wq->terminate) {
+ ASSERT(wq->item_count == 0);
+ pthread_cond_wait(&wq->wakeup, &wq->lock);
+ }
+ if (wq->next_item == NULL && wq->terminate) {
+ pthread_mutex_unlock(&wq->lock);
+ break;
+ }
- init_workers(wq, thcnt);
+ /*
+ * Dequeue work from the head of the list.
+ */
+ ASSERT(wq->item_count > 0);
+ wi = wq->next_item;
+ wq->next_item = wi->next;
+ wq->item_count--;
- if ((work_threads = (pthread_t *)malloc(sizeof(pthread_t) * thcnt)) == NULL)
- do_error(_("cannot malloc %ld bytes for work_threads array\n"),
- sizeof(pthread_t) * thcnt);
+ pthread_mutex_unlock(&wq->lock);
- /*
- ** Create worker threads
- */
- for (i = 0; i < thcnt; i++) {
- err = pthread_create(&work_threads[i], attrp, worker_thread, (void *) i);
- if(err > 0) {
- do_error(_("cannot create worker threads, status = [%d] %s\n"),
- err, strerror(err));
- }
+ (wi->function)(wi->queue, wi->agno, wi->arg);
+ free(wi);
}
- do_log(_(" - creating %d worker thread(s)\n"), thcnt);
- /*
- ** Wait for all worker threads to initialize
- */
- quiesce_workers(wq);
+ return NULL;
}
void
thread_init(void)
{
- int status;
- pthread_attr_t attr;
sigset_t blocked;
- if (do_parallel == 0)
- return;
- if (thread_count == 0)
- thread_count = 2 * libxfs_nproc();
-
- if ((status = pthread_attr_init(&attr)) != 0)
- do_error(_("status from pthread_attr_init: %d"),status);
-
- if ((status = pthread_setconcurrency(thread_count)) != 0)
- do_error(_("Status from pthread_setconcurrency(%d): %d"), thread_count, status);
-
/*
* block delivery of progress report signal to all threads
*/
sigaddset(&blocked, SIGHUP);
sigaddset(&blocked, SIGALRM);
pthread_sigmask(SIG_BLOCK, &blocked, NULL);
-
- start_workers(&work_queue, thread_count, &attr);
}
-/*
- * Dequeue from the head of the list.
- * wq->mutex held.
- */
-static work_t *
-dequeue(work_queue_t *wq)
+
+void
+create_work_queue(
+ work_queue_t *wq,
+ xfs_mount_t *mp,
+ int nworkers)
{
- work_t *wp;
+ int err;
+ int i;
- ASSERT(wq->work_count > 0);
- wp = wq->next;
- wq->next = wp->next;
- wq->work_count--;
- if (wq->next == NULL) {
- ASSERT(wq->work_count == 0);
- wq->last = NULL;
- }
- wp->next = NULL;
- return (wp);
-}
+ memset(wq, 0, sizeof(work_queue_t));
-static void *
-worker_thread(void *arg)
-{
- work_queue_t *wq;
- work_t *wp;
- int err;
- unsigned long myid;
+ pthread_cond_init(&wq->wakeup, NULL);
+ pthread_mutex_init(&wq->lock, NULL);
- wq = &work_queue;
- myid = (unsigned long) arg;
- ts_init();
- libxfs_lio_allocate();
+ wq->mp = mp;
+ wq->thread_count = nworkers;
+ wq->threads = malloc(nworkers * sizeof(pthread_t));
+ wq->terminate = 0;
- /*
- * Loop pulling work from the global work queue.
- * Check for notification to exit after every chunk of work.
- */
- while (1) {
- if ((err = pthread_mutex_lock(&wq->mutex)) > 0)
- do_error(_("work_thread%d: thread 0x%x: pthread_mutex_lock error %d: %s\n"),
- myid, pthread_self(), err, strerror(err));
- /*
- * Wait for work.
- */
- while (wq->next == NULL) {
- ASSERT(wq->work_count == 0);
- /*
- * Last thread going to idle sleep must wakeup
- * the master thread. Same mutex is used to lock
- * around two different condition variables.
- */
- wq->active_threads--;
- ASSERT(wq->active_threads >= 0);
- if (!wq->active_threads) {
- if ((err = pthread_cond_signal(&wq->mcv)) > 0)
- do_error(_("work_thread%d: thread 0x%x: pthread_cond_signal error %d: %s\n"),
- myid, pthread_self(), err, strerror(err));
- }
- if ((err = pthread_cond_wait(&wq->wcv, &wq->mutex)) > 0)
- do_error(_("work_thread%d: thread 0x%x: pthread_cond_wait error %d: %s\n"),
- myid, pthread_self(), err, strerror(err));
- wq->active_threads++;
+ for (i = 0; i < nworkers; i++) {
+ err = pthread_create(&wq->threads[i], NULL, worker_thread, wq);
+ if (err != 0) {
+ do_error(_("cannot create worker threads, error = [%d] %s\n"),
+ err, strerror(err));
}
- /*
- * Dequeue work from the head of the list.
- */
- ASSERT(wq->work_count > 0);
- wp = dequeue(wq);
- if ((err = pthread_mutex_unlock(&wq->mutex)) > 0)
- do_error(_("work_thread%d: thread 0x%x: pthread_mutex_unlock error %d: %s\n"),
- myid, pthread_self(), err, strerror(err));
- /*
- * Do the work.
- */
- (wp->function)(wp->mp, wp->agno);
-
- free(wp);
}
- /* NOT REACHED */
- pthread_exit(NULL);
- return (NULL);
+
}
-int
-queue_work(disp_func_t func, xfs_mount_t *mp, xfs_agnumber_t agno)
+void
+queue_work(
+ work_queue_t *wq,
+ work_func_t func,
+ xfs_agnumber_t agno,
+ void *arg)
{
- work_queue_t *wq;
- work_t *wp;
+ work_item_t *wi;
- if (do_parallel == 0) {
- func(mp, agno);
- return 0;
- }
- wq = &work_queue;
- /*
- * Get memory for a new work structure.
- */
- if ((wp = (work_t *)memalign(8, sizeof(work_t))) == NULL)
- return (ENOMEM);
- /*
- * Initialize the new work structure.
- */
- wp->function = func;
- wp->mp = mp;
- wp->agno = agno;
+ wi = (work_item_t *)malloc(sizeof(work_item_t));
+ if (wi == NULL)
+ do_error(_("cannot allocate worker item, error = [%d] %s\n"),
+ errno, strerror(errno));
+
+ wi->function = func;
+ wi->agno = agno;
+ wi->arg = arg;
+ wi->queue = wq;
+ wi->next = NULL;
/*
* Now queue the new work structure to the work queue.
*/
- if (wq->next == NULL) {
- wq->next = wp;
+ pthread_mutex_lock(&wq->lock);
+ if (wq->next_item == NULL) {
+ wq->next_item = wi;
+ ASSERT(wq->item_count == 0);
+ pthread_cond_signal(&wq->wakeup);
} else {
- wq->last->next = wp;
+ wq->last_item->next = wi;
}
- wq->last = wp;
- wp->next = NULL;
- wq->work_count++;
-
- return (0);
+ wq->last_item = wi;
+ wq->item_count++;
+ pthread_mutex_unlock(&wq->lock);
}
void
-wait_for_workers(void)
+destroy_work_queue(
+ work_queue_t *wq)
{
- int err;
- work_queue_t *wq;
+ int i;
- if (do_parallel == 0)
- return;
- wq = &work_queue;
- if ((err = pthread_mutex_lock(&wq->mutex)) > 0)
- do_error(_("wait_for_workers: thread 0x%x: pthread_mutex_lock error %d: %s\n"),
- pthread_self(), err, strerror(err));
+ pthread_mutex_lock(&wq->lock);
+ wq->terminate = 1;
+ pthread_mutex_unlock(&wq->lock);
- ASSERT(wq->active_threads == 0);
- if (wq->work_count > 0) {
- /* get the workers going */
- if ((err = pthread_cond_broadcast(&wq->wcv)) > 0)
- do_error(_("wait_for_workers: thread 0x%x: pthread_cond_broadcast error %d: %s\n"),
- pthread_self(), err, strerror(err));
- /* and wait for them */
- if ((err = pthread_cond_wait(&wq->mcv, &wq->mutex)) > 0)
- do_error(_("wait_for_workers: thread 0x%x: pthread_cond_wait error %d: %s\n"),
- pthread_self(), err, strerror(err));
- }
- ASSERT(wq->active_threads == 0);
- ASSERT(wq->work_count == 0);
+ pthread_cond_broadcast(&wq->wakeup);
+
+ for (i = 0; i < wq->thread_count; i++)
+ pthread_join(wq->threads[i], NULL);
- if ((err = pthread_mutex_unlock(&wq->mutex)) > 0)
- do_error(_("wait_for_workers: thread 0x%x: pthread_mutex_unlock error %d: %s\n"),
- pthread_self(), err, strerror(err));
+ free(wq->threads);
+ pthread_mutex_destroy(&wq->lock);
+ pthread_cond_destroy(&wq->wakeup);
}
#ifndef _XFS_REPAIR_THREADS_H_
#define _XFS_REPAIR_THREADS_H_
-extern int do_parallel;
-extern int thread_count;
-/*
-** locking variants - rwlock/mutex
-*/
-#define PREPAIR_RW_LOCK_ATTR PTHREAD_PROCESS_PRIVATE
-
-#define PREPAIR_RW_LOCK_ALLOC(lkp, n) \
- if (do_parallel) { \
- lkp = malloc(n*sizeof(pthread_rwlock_t)); \
- if (lkp == NULL) \
- do_error("cannot alloc %d locks\n", n); \
- /* NO RETURN */ \
- }
-#define PREPAIR_RW_LOCK_INIT(l,a) if (do_parallel) pthread_rwlock_init((l),(a))
-#define PREPAIR_RW_READ_LOCK(l) if (do_parallel) pthread_rwlock_rdlock((l))
-#define PREPAIR_RW_WRITE_LOCK(l) if (do_parallel) pthread_rwlock_wrlock((l))
-#define PREPAIR_RW_UNLOCK(l) if (do_parallel) pthread_rwlock_unlock((l))
-#define PREPAIR_RW_WRITE_LOCK_NOTEST(l) pthread_rwlock_wrlock((l))
-#define PREPAIR_RW_UNLOCK_NOTEST(l) pthread_rwlock_unlock((l))
-#define PREPAIR_RW_LOCK_DELETE(l) if (do_parallel) pthread_rwlock_destroy((l))
-
-#define PREPAIR_MTX_LOCK_INIT(m, a) if (do_parallel) pthread_mutex_init((m), (a))
-#define PREPAIR_MTX_ATTR_INIT(a) if (do_parallel) pthread_mutexattr_init((a))
-#define PREPAIR_MTX_ATTR_SET(a, l) if (do_parallel) pthread_mutexattr_settype((a), l)
-#define PREPAIR_MTX_LOCK(m) if (do_parallel) pthread_mutex_lock(m)
-#define PREPAIR_MTX_UNLOCK(m) if (do_parallel) pthread_mutex_unlock(m)
-
-
-typedef void disp_func_t(xfs_mount_t *mp, xfs_agnumber_t agno);
-extern int queue_work(disp_func_t func, xfs_mount_t *mp, xfs_agnumber_t agno);
-extern void wait_for_workers(void);
+void thread_init(void);
+
+struct work_queue;
+
+typedef void work_func_t(struct work_queue *, xfs_agnumber_t, void *);
+
+typedef struct work_item {
+ struct work_item *next;
+ work_func_t *function;
+ struct work_queue *queue;
+ xfs_agnumber_t agno;
+ void *arg;
+} work_item_t;
+
+typedef struct work_queue {
+ work_item_t *next_item;
+ work_item_t *last_item;
+ int item_count;
+ int thread_count;
+ pthread_t *threads;
+ xfs_mount_t *mp;
+ pthread_mutex_t lock;
+ pthread_cond_t wakeup;
+ int terminate;
+} work_queue_t;
+
+void
+create_work_queue(
+ work_queue_t *wq,
+ xfs_mount_t *mp,
+ int nworkers);
+
+void
+queue_work(
+ work_queue_t *wq,
+ work_func_t func,
+ xfs_agnumber_t agno,
+ void *arg);
+
+void
+destroy_work_queue(
+ work_queue_t *wq);
#endif /* _XFS_REPAIR_THREADS_H_ */
"ihash",
#define BHASH_SIZE 3
"bhash",
-#define PREFETCH_INO_CNT 4
- "pfino",
-#define PREFETCH_DIR_CNT 5
- "pfdir",
-#define PREFETCH_AIO_CNT 6
- "pfaio",
-#define AG_STRIDE 7
+#define AG_STRIDE 4
"ag_stride",
NULL
};
+static int ihash_option_used;
+static int bhash_option_used;
+
static void
usage(void)
{
pre_65_beta = 0;
fs_shared_allowed = 1;
ag_stride = 0;
- thread_count = 0;
- do_parallel = 0;
+ thread_count = 1;
report_interval = PROG_RPT_DEFAULT;
/*
break;
case IHASH_SIZE:
libxfs_ihash_size = (int) strtol(val, 0, 0);
+ ihash_option_used = 1;
break;
case BHASH_SIZE:
libxfs_bhash_size = (int) strtol(val, 0, 0);
- break;
- case PREFETCH_INO_CNT:
- libxfs_lio_ino_count = (int) strtol(val, 0, 0);
- break;
- case PREFETCH_DIR_CNT:
- libxfs_lio_dir_count = (int) strtol(val, 0, 0);
- break;
- case PREFETCH_AIO_CNT:
- libxfs_lio_aio_count = (int) strtol(val, 0, 0);
+ bhash_option_used = 1;
break;
case AG_STRIDE:
ag_stride = (int) strtol(val, 0, 0);
printf(_("%s version %s\n"), progname, VERSION);
exit(0);
case 'P':
- do_prefetch ^= 1;
- break;
- case 'M':
- do_parallel ^= 1;
+ do_prefetch = 0;
break;
case 't':
report_interval = (int) strtol(optarg, 0, 0);
bindtextdomain(PACKAGE, LOCALEDIR);
textdomain(PACKAGE);
+#ifdef XR_PF_TRACE
+ pf_trace_file = fopen("/tmp/xfs_repair_prefetch.trace", "w");
+ setvbuf(pf_trace_file, NULL, _IOLBF, 1024);
+#endif
+
temp_mp = &xfs_m;
setbuf(stdout, NULL);
process_args(argc, argv);
xfs_init(&x);
+ msgbuf = malloc(DURATION_BUF_SIZE);
+
timestamp(PHASE_START, 0, NULL);
timestamp(PHASE_END, 0, NULL);
inodes_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog;
if (ag_stride) {
- do_parallel = 1;
- thread_count = (mp->m_sb.sb_agcount + ag_stride - 1) / ag_stride;
+ thread_count = (glob_agcount + ag_stride - 1) / ag_stride;
thread_init();
}
- if (do_parallel && report_interval) {
+ if (ag_stride && report_interval) {
init_progress_rpt();
- msgbuf = malloc(DURATION_BUF_SIZE);
if (msgbuf) {
do_log(_(" - reporting progress in intervals of %s\n"),
duration(report_interval, msgbuf));
- free(msgbuf);
}
}
+ /*
+ * Adjust libxfs cache sizes based on system memory,
+ * filesystem size and inode count.
+ *
+ * We'll set the cache size based on 3/4s the memory minus
+ * space used by the inode AVL tree and block usage map.
+ *
+ * Inode AVL tree space is approximately 4 bytes per inode,
+ * block usage map is currently 1 byte for 2 blocks.
+ *
+ * We assume most blocks will be inode clusters.
+ *
+ * Calculations are done in kilobyte units.
+ */
+
+ if (!bhash_option_used) {
+ unsigned long mem_used;
+ unsigned long phys_mem;
+
+ libxfs_icache_purge();
+ libxfs_bcache_purge();
+ cache_destroy(libxfs_icache);
+ cache_destroy(libxfs_bcache);
+
+ mem_used = (mp->m_sb.sb_icount >> (10 - 2)) +
+ (mp->m_sb.sb_dblocks >> (10 + 1));
+ phys_mem = libxfs_physmem() * 3 / 4;
+
+ if (verbose > 1)
+ do_log(_(" - icount = %llu, imem = %lu, "
+ "dblock = %llu, dmem = %lu\n"),
+ mp->m_sb.sb_icount, mp->m_sb.sb_icount >> (10 - 2),
+ mp->m_sb.sb_dblocks, mp->m_sb.sb_dblocks >> (10 + 1));
+
+ if (phys_mem <= mem_used) {
+ /*
+ * Turn off prefetch and minimise libxfs cache if
+ * physical memory is deemed insufficient
+ */
+ do_prefetch = 0;
+ libxfs_bhash_size = 64;
+ } else {
+ phys_mem -= mem_used;
+ if (phys_mem >= (1 << 30))
+ phys_mem = 1 << 30;
+ libxfs_bhash_size = phys_mem / (HASH_CACHE_RATIO *
+ (mp->m_inode_cluster_size >> 10));
+ if (libxfs_bhash_size < 512)
+ libxfs_bhash_size = 512;
+ }
+
+ if (verbose)
+ do_log(_(" - block cache size set to %d entries\n"),
+ libxfs_bhash_size * HASH_CACHE_RATIO);
+
+ if (!ihash_option_used)
+ libxfs_ihash_size = libxfs_bhash_size;
+
+ libxfs_icache = cache_init(libxfs_ihash_size,
+ &libxfs_icache_operations);
+ libxfs_bcache = cache_init(libxfs_bhash_size,
+ &libxfs_bcache_operations);
+ }
+
/*
* calculate what mkfs would do to this filesystem
*/
phase2(mp);
timestamp(PHASE_END, 2, NULL);
+ if (do_prefetch)
+ init_prefetch(mp);
+
phase3(mp);
timestamp(PHASE_END, 3, NULL);
phase4(mp);
timestamp(PHASE_END, 4, NULL);
- /* XXX: nathans - something in phase4 ain't playing by */
- /* the buffer cache rules.. why doesn't IRIX hit this? */
- libxfs_bcache_flush();
-
if (no_modify)
printf(_("No modify flag set, skipping phase 5\n"));
else {
phase6(mp);
timestamp(PHASE_END, 6, NULL);
- libxfs_bcache_flush();
-
phase7(mp);
timestamp(PHASE_END, 7, NULL);
} else {
}
}
- if (do_parallel && report_interval)
+ if (ag_stride && report_interval)
stop_progress_rpt();
if (no_modify) {
return(0);
}
- /*
- * Done, flush all cached buffers and inodes.
- */
- libxfs_icache_purge();
- libxfs_bcache_purge();
-
/*
* Clear the quota flags if they're on.
*/
libxfs_writebuf(sbp, 0);
+ /*
+ * Done, flush all cached buffers and inodes.
+ */
+ libxfs_bcache_flush();
+
libxfs_umount(mp);
if (x.rtdev)
libxfs_device_close(x.rtdev);
if (verbose)
summary_report();
do_log(_("done\n"));
+#ifdef XR_PF_TRACE
+ fclose(pf_trace_file);
+#endif
return (0);
}