From 3b6ac90313a8574a47db801dfd50512803cfaac6 Mon Sep 17 00:00:00 2001 From: Madan Valluri Date: Tue, 29 Aug 2006 00:49:38 +0000 Subject: [PATCH] Multi-thread modifications to xfs_repair. Added declaration of libxfs_nproc. --- include/libxfs.h | 1 + libxfs/darwin.c | 6 + libxfs/freebsd.c | 6 + libxfs/init.c | 6 + libxfs/init.h | 1 + libxfs/irix.c | 8 ++ libxfs/linux.c | 6 + repair/Makefile | 4 +- repair/dino_chunks.c | 18 ++- repair/dinode.c | 61 ++++++--- repair/dir_stack.c | 14 ++ repair/globals.h | 8 +- repair/incore.c | 3 + repair/incore_ext.c | 35 ++++- repair/incore_ino.c | 7 + repair/init.c | 9 +- repair/phase3.c | 24 +++- repair/phase4.c | 22 +++- repair/phase5.c | 108 ++++++++++----- repair/phase7.c | 4 +- repair/protos.h | 2 + repair/threads.c | 308 +++++++++++++++++++++++++++++++++++++++++++ repair/threads.h | 37 ++++++ repair/xfs_repair.c | 90 +++++++++++-- 24 files changed, 698 insertions(+), 90 deletions(-) create mode 100644 repair/threads.c create mode 100644 repair/threads.h diff --git a/include/libxfs.h b/include/libxfs.h index d4680dbae..f0139449a 100644 --- a/include/libxfs.h +++ b/include/libxfs.h @@ -577,6 +577,7 @@ typedef struct libxfs_lio_req { #define LIBXFS_LIO_TYPE_RAW 0x3 #define LIBXFS_BBTOOFF64(bbs) (((xfs_off_t)(bbs)) << BBSHIFT) +extern int libxfs_nproc(void); #include #include diff --git a/libxfs/darwin.c b/libxfs/darwin.c index cb538d982..3a1e83334 100644 --- a/libxfs/darwin.c +++ b/libxfs/darwin.c @@ -118,3 +118,9 @@ platform_align_blockdev(void) { return (sizeof(void *)); } + +int +platform_nproc(void) +{ + return 1; +} diff --git a/libxfs/freebsd.c b/libxfs/freebsd.c index 441cb0ae9..1cbf7aaa9 100644 --- a/libxfs/freebsd.c +++ b/libxfs/freebsd.c @@ -178,3 +178,9 @@ platform_align_blockdev(void) { return (sizeof(void *)); } + +int +platform_nproc(void) +{ + return 1; +} diff --git a/libxfs/init.c b/libxfs/init.c index 7e3bb7254..5bb9ecf89 100644 --- a/libxfs/init.c +++ b/libxfs/init.c @@ -703,3 +703,9 @@ libxfs_report(FILE *fp) c = asctime(localtime(&t)); fprintf(fp, "%s", c); } + +int +libxfs_nproc(void) +{ + return platform_nproc(); +} diff --git a/libxfs/init.h b/libxfs/init.h index 38d90b901..72249926d 100644 --- a/libxfs/init.h +++ b/libxfs/init.h @@ -33,5 +33,6 @@ extern char *platform_findblockpath (char *path); extern int platform_direct_blockdev (void); extern int platform_align_blockdev (void); extern int platform_aio_init (int aio_count); +extern int platform_nproc(void); #endif /* LIBXFS_INIT_H */ diff --git a/libxfs/irix.c b/libxfs/irix.c index 9869f77cb..a3cf647f9 100644 --- a/libxfs/irix.c +++ b/libxfs/irix.c @@ -19,6 +19,7 @@ #include #include #include +#include extern char *progname; extern __int64_t findsize(char *); @@ -102,3 +103,10 @@ platform_align_blockdev(void) { return (sizeof(void *)); } + +int +platform_nproc(void) +{ + return sysmp(MP_NPROCS); +} + diff --git a/libxfs/linux.c b/libxfs/linux.c index 08befd1a3..1d0b196cf 100644 --- a/libxfs/linux.c +++ b/libxfs/linux.c @@ -207,3 +207,9 @@ platform_align_blockdev(void) return getpagesize(); return max_block_alignment; } + +int +platform_nproc(void) +{ + return sysconf(_SC_NPROCESSORS_ONLN); +} diff --git a/repair/Makefile b/repair/Makefile index f10442a29..835c0d3fb 100644 --- a/repair/Makefile +++ b/repair/Makefile @@ -9,13 +9,13 @@ LTCOMMAND = xfs_repair HFILES = agheader.h attr_repair.h avl.h avl64.h bmap.h dinode.h dir.h \ dir2.h dir_stack.h err_protos.h globals.h incore.h protos.h rt.h \ - scan.h versions.h prefetch.h + scan.h versions.h prefetch.h threads.h CFILES = agheader.c attr_repair.c avl.c avl64.c bmap.c dino_chunks.c \ dinode.c dir.c dir2.c dir_stack.c globals.c incore.c \ incore_bmc.c init.c incore_ext.c incore_ino.c phase1.c \ phase2.c phase3.c phase4.c phase5.c phase6.c phase7.c rt.c sb.c \ - prefetch.c scan.c versions.c xfs_repair.c + prefetch.c scan.c versions.c xfs_repair.c threads.c LLDLIBS = $(LIBXFS) $(LIBXLOG) $(LIBUUID) $(LIBPTHREAD) $(LIBRT) LTDEPENDENCIES = $(LIBXFS) $(LIBXLOG) diff --git a/repair/dino_chunks.c b/repair/dino_chunks.c index e8ad51c0d..0b16db669 100644 --- a/repair/dino_chunks.c +++ b/repair/dino_chunks.c @@ -26,6 +26,7 @@ #include "dir.h" #include "dinode.h" #include "prefetch.h" +#include "threads.h" #include "versions.h" /* @@ -148,16 +149,19 @@ verify_inode_chunk(xfs_mount_t *mp, if (check_inode_block(mp, ino) == 0) return(0); + PREPAIR_RW_WRITE_LOCK(&per_ag_lock[agno]); switch (state = get_agbno_state(mp, agno, agbno)) { case XR_E_INO: do_warn( _("uncertain inode block %d/%d already known\n"), agno, agbno); + PREPAIR_RW_UNLOCK(&per_ag_lock[agno]); break; case XR_E_UNKNOWN: case XR_E_FREE1: case XR_E_FREE: set_agbno_state(mp, agno, agbno, XR_E_INO); + PREPAIR_RW_UNLOCK(&per_ag_lock[agno]); break; case XR_E_MULT: case XR_E_INUSE: @@ -170,12 +174,14 @@ verify_inode_chunk(xfs_mount_t *mp, _("inode block %d/%d multiply claimed, (state %d)\n"), agno, agbno, state); set_agbno_state(mp, agno, agbno, XR_E_MULT); + PREPAIR_RW_UNLOCK(&per_ag_lock[agno]); return(0); default: do_warn( _("inode block %d/%d bad state, (state %d)\n"), agno, agbno, state); set_agbno_state(mp, agno, agbno, XR_E_INO); + PREPAIR_RW_UNLOCK(&per_ag_lock[agno]); break; } @@ -425,6 +431,7 @@ verify_inode_chunk(xfs_mount_t *mp, * user data -- we're probably here as a result of a directory * entry or an iunlinked pointer */ + PREPAIR_RW_WRITE_LOCK(&per_ag_lock[agno]); for (j = 0, cur_agbno = chunk_start_agbno; cur_agbno < chunk_stop_agbno; cur_agbno++) { switch (state = get_agbno_state(mp, agno, cur_agbno)) { @@ -447,9 +454,12 @@ verify_inode_chunk(xfs_mount_t *mp, break; } - if (j) + if (j) { + PREPAIR_RW_UNLOCK(&per_ag_lock[agno]); return(0); + } } + PREPAIR_RW_UNLOCK(&per_ag_lock[agno]); /* * ok, chunk is good. put the record into the tree if required, @@ -472,6 +482,7 @@ verify_inode_chunk(xfs_mount_t *mp, set_inode_used(irec_p, agino - start_agino); + PREPAIR_RW_WRITE_LOCK(&per_ag_lock[agno]); for (cur_agbno = chunk_start_agbno; cur_agbno < chunk_stop_agbno; cur_agbno++) { switch (state = get_agbno_state(mp, agno, cur_agbno)) { @@ -501,6 +512,7 @@ verify_inode_chunk(xfs_mount_t *mp, break; } } + PREPAIR_RW_UNLOCK(&per_ag_lock[agno]); return(ino_cnt); } @@ -700,6 +712,7 @@ process_inode_chunk(xfs_mount_t *mp, xfs_agnumber_t agno, int num_inos, /* * mark block as an inode block in the incore bitmap */ + PREPAIR_RW_WRITE_LOCK(&per_ag_lock[agno]); switch (state = get_agbno_state(mp, agno, agbno)) { case XR_E_INO: /* already marked */ break; @@ -717,6 +730,7 @@ process_inode_chunk(xfs_mount_t *mp, xfs_agnumber_t agno, int num_inos, XFS_AGB_TO_FSB(mp, agno, agbno), state); break; } + PREPAIR_RW_UNLOCK(&per_ag_lock[agno]); while (!done) { /* @@ -869,6 +883,7 @@ process_inode_chunk(xfs_mount_t *mp, xfs_agnumber_t agno, int num_inos, ibuf_offset = 0; agbno++; + PREPAIR_RW_WRITE_LOCK(&per_ag_lock[agno]); switch (state = get_agbno_state(mp, agno, agbno)) { case XR_E_INO: /* already marked */ break; @@ -888,6 +903,7 @@ process_inode_chunk(xfs_mount_t *mp, xfs_agnumber_t agno, int num_inos, XFS_AGB_TO_FSB(mp, agno, agbno), state); break; } + PREPAIR_RW_UNLOCK(&per_ag_lock[agno]); } else if (irec_offset == XFS_INODES_PER_CHUNK) { /* diff --git a/repair/dinode.c b/repair/dinode.c index e384e3e67..c4648f240 100644 --- a/repair/dinode.c +++ b/repair/dinode.c @@ -30,6 +30,7 @@ #include "versions.h" #include "attr_repair.h" #include "bmap.h" +#include "threads.h" /* * inode clearing routines @@ -513,6 +514,29 @@ get_bmbt_reclist( return(NULLDFSBNO); } +/* + * process_bmbt_reclist_int is the most compute intensive + * function in repair. The following macros reduce the + * the large number of lock/unlock steps it would otherwise + * call. + */ +#define PROCESS_BMBT_DECL(type, var) type var + +#define PROCESS_BMBT_LOCK(agno) \ + if (do_parallel && (agno != locked_agno)) { \ + if (locked_agno != -1) /* release old ag lock */ \ + PREPAIR_RW_UNLOCK_NOTEST(&per_ag_lock[locked_agno]); \ + PREPAIR_RW_WRITE_LOCK_NOTEST(&per_ag_lock[agno]); \ + locked_agno = agno; \ + } + +#define PROCESS_BMBT_UNLOCK_RETURN(val) \ + do { \ + if (locked_agno != -1) \ + PREPAIR_RW_UNLOCK_NOTEST(&per_ag_lock[locked_agno]); \ + return (val); \ + } while (0) + /* * return 1 if inode should be cleared, 0 otherwise * if check_dups should be set to 1, that implies that @@ -552,6 +576,8 @@ process_bmbt_reclist_int( xfs_dfsbno_t e; xfs_agnumber_t agno; xfs_agblock_t agbno; + PROCESS_BMBT_DECL + (xfs_agnumber_t, locked_agno=-1); if (whichfork == XFS_DATA_FORK) forkname = _("data"); @@ -574,7 +600,7 @@ process_bmbt_reclist_int( _("bmap rec out of order, inode %llu entry %d " "[o s c] [%llu %llu %llu], %d [%llu %llu %llu]\n"), ino, i, o, s, c, i-1, op, sp, cp); - return(1); + PROCESS_BMBT_UNLOCK_RETURN(1); } op = o; cp = c; @@ -587,27 +613,27 @@ process_bmbt_reclist_int( do_warn( _("zero length extent (off = %llu, fsbno = %llu) in ino %llu\n"), o, s, ino); - return(1); + PROCESS_BMBT_UNLOCK_RETURN(1); } if (type == XR_INO_RTDATA) { if (s >= mp->m_sb.sb_rblocks) { do_warn( _("inode %llu - bad rt extent start block number %llu, offset %llu\n"), ino, s, o); - return(1); + PROCESS_BMBT_UNLOCK_RETURN(1); } if (s + c - 1 >= mp->m_sb.sb_rblocks) { do_warn( _("inode %llu - bad rt extent last block number %llu, offset %llu\n"), ino, s + c - 1, o); - return(1); + PROCESS_BMBT_UNLOCK_RETURN(1); } if (s + c - 1 < s) { do_warn( _("inode %llu - bad rt extent overflows - start %llu, end %llu, " "offset %llu\n"), ino, s, s + c - 1, o); - return(1); + PROCESS_BMBT_UNLOCK_RETURN(1); } } else { switch (verify_dfsbno_range(mp, s, c)) { @@ -617,26 +643,26 @@ process_bmbt_reclist_int( do_warn( _("inode %llu - bad extent starting block number %llu, offset %llu\n"), ino, s, o); - return(1); + PROCESS_BMBT_UNLOCK_RETURN(1); case XR_DFSBNORANGE_BADEND: do_warn( _("inode %llu - bad extent last block number %llu, offset %llu\n"), ino, s + c - 1, o); - return(1); + PROCESS_BMBT_UNLOCK_RETURN(1); case XR_DFSBNORANGE_OVERFLOW: do_warn( _("inode %llu - bad extent overflows - start %llu, end %llu, " "offset %llu\n"), ino, s, s + c - 1, o); - return(1); + PROCESS_BMBT_UNLOCK_RETURN(1); } if (o >= fs_max_file_offset) { do_warn( _("inode %llu - extent offset too large - start %llu, count %llu, " "offset %llu\n"), ino, s, c, o); - return(1); + PROCESS_BMBT_UNLOCK_RETURN(1); } } @@ -654,7 +680,7 @@ process_bmbt_reclist_int( do_warn( _("malformed rt inode extent [%llu %llu] (fs rtext size = %u)\n"), s, c, mp->m_sb.sb_rextsize); - return(1); + PROCESS_BMBT_UNLOCK_RETURN(1); } /* @@ -676,7 +702,7 @@ process_bmbt_reclist_int( _("data fork in rt ino %llu claims dup rt extent, off - %llu, " "start - %llu, count %llu\n"), ino, o, s, c); - return(1); + PROCESS_BMBT_UNLOCK_RETURN(1); } continue; } @@ -714,7 +740,7 @@ process_bmbt_reclist_int( do_warn( _("%s fork in rt inode %llu claims used rt block %llu\n"), forkname, ino, ext); - return(1); + PROCESS_BMBT_UNLOCK_RETURN(1); case XR_E_FREE1: default: do_error( @@ -748,6 +774,7 @@ process_bmbt_reclist_int( agno = XFS_FSB_TO_AGNO(mp, s); agbno = XFS_FSB_TO_AGBNO(mp, s); e = s + c; + PROCESS_BMBT_LOCK(agno); for (b = s; b < e; b++, agbno++) { if (check_dups == 1) { /* @@ -761,7 +788,7 @@ process_bmbt_reclist_int( _("%s fork in ino %llu claims dup extent, off - %llu, " "start - %llu, cnt %llu\n"), forkname, ino, o, s, c); - return(1); + PROCESS_BMBT_UNLOCK_RETURN(1); } continue; } @@ -772,7 +799,7 @@ process_bmbt_reclist_int( */ if (type == XR_INO_RTDATA && whichfork == XFS_ATTR_FORK) { if (mp->m_sb.sb_agcount < agno) - return(1); + PROCESS_BMBT_UNLOCK_RETURN(1); } /* Process in chunks of 16 (XR_BB_UNIT/XR_BB) @@ -809,14 +836,14 @@ process_bmbt_reclist_int( do_warn( _("%s fork in inode %llu claims metadata block %llu\n"), forkname, ino, (__uint64_t) b); - return(1); + PROCESS_BMBT_UNLOCK_RETURN(1); case XR_E_INUSE: case XR_E_MULT: set_agbno_state(mp, agno, agbno, XR_E_MULT); do_warn( _("%s fork in %s inode %llu claims used block %llu\n"), forkname, ftype, ino, (__uint64_t) b); - return(1); + PROCESS_BMBT_UNLOCK_RETURN(1); default: do_error( _("illegal state %d in block map %llu\n"), @@ -827,7 +854,7 @@ process_bmbt_reclist_int( *tot += c; } - return(0); + PROCESS_BMBT_UNLOCK_RETURN(0); } /* diff --git a/repair/dir_stack.c b/repair/dir_stack.c index 1995d5ada..8cf5835d6 100644 --- a/repair/dir_stack.c +++ b/repair/dir_stack.c @@ -19,6 +19,7 @@ #include #include "dir_stack.h" #include "err_protos.h" +#include "threads.h" /* * a directory stack for holding directories while @@ -29,6 +30,9 @@ static dir_stack_t dirstack_freelist; static int dirstack_init = 0; +static pthread_mutex_t dirstack_mutex; +static pthread_mutexattr_t dirstack_mutexattr; + void dir_stack_init(dir_stack_t *stack) @@ -38,6 +42,11 @@ dir_stack_init(dir_stack_t *stack) if (dirstack_init == 0) { dirstack_init = 1; + PREPAIR_MTX_ATTR_INIT(&dirstack_mutexattr); +#ifdef PTHREAD_MUTEX_SPINBLOCK_NP + PREPAIR_MTX_ATTR_SET(&dirstack_mutexattr, PTHREAD_MUTEX_SPINBLOCK_NP); +#endif + PREPAIR_MTX_LOCK_INIT(&dirstack_mutex, &dirstack_mutexattr); dir_stack_init(&dirstack_freelist); } @@ -85,8 +94,10 @@ push_dir(dir_stack_t *stack, xfs_ino_t ino) { dir_stack_elem_t *elem; + PREPAIR_MTX_LOCK(&dirstack_mutex); if (dirstack_freelist.cnt == 0) { if ((elem = malloc(sizeof(dir_stack_elem_t))) == NULL) { + PREPAIR_MTX_UNLOCK(&dirstack_mutex); do_error( _("couldn't malloc dir stack element, try more swap\n")); exit(1); @@ -94,6 +105,7 @@ push_dir(dir_stack_t *stack, xfs_ino_t ino) } else { elem = dir_stack_pop(&dirstack_freelist); } + PREPAIR_MTX_UNLOCK(&dirstack_mutex); elem->ino = ino; @@ -116,7 +128,9 @@ pop_dir(dir_stack_t *stack) ino = elem->ino; elem->ino = NULLFSINO; + PREPAIR_MTX_LOCK(&dirstack_mutex); dir_stack_push(&dirstack_freelist, elem); + PREPAIR_MTX_UNLOCK(&dirstack_mutex); return(ino); } diff --git a/repair/globals.h b/repair/globals.h index 8194facfa..f581bcf16 100644 --- a/repair/globals.h +++ b/repair/globals.h @@ -191,8 +191,10 @@ EXTERN xfs_extlen_t sb_inoalignmt; EXTERN __uint32_t sb_unit; EXTERN __uint32_t sb_width; -extern size_t ts_dirbuf_size; -extern size_t ts_dir_freemap_size; -extern size_t ts_attr_freemap_size; +extern size_t ts_dirbuf_size; +extern size_t ts_dir_freemap_size; +extern size_t ts_attr_freemap_size; + +EXTERN pthread_rwlock_t *per_ag_lock; #endif /* _XFS_REPAIR_GLOBAL_H */ diff --git a/repair/incore.c b/repair/incore.c index 2be59d20b..8598b6f1f 100644 --- a/repair/incore.c +++ b/repair/incore.c @@ -23,6 +23,7 @@ #include "agheader.h" #include "protos.h" #include "err_protos.h" +#include "threads.h" /* * push a block allocation record onto list. assumes list @@ -64,6 +65,7 @@ setup_bmap(xfs_agnumber_t agno, xfs_agblock_t numblocks, xfs_drtbno_t rtblocks) do_error(_("couldn't allocate block map pointers\n")); return; } + PREPAIR_RW_LOCK_ALLOC(per_ag_lock, agno); for (i = 0; i < agno; i++) { size = roundup((numblocks+(NBBY/XR_BB)-1) / (NBBY/XR_BB), sizeof(__uint64_t)); @@ -75,6 +77,7 @@ setup_bmap(xfs_agnumber_t agno, xfs_agblock_t numblocks, xfs_drtbno_t rtblocks) return; } bzero(ba_bmap[i], size); + PREPAIR_RW_LOCK_INIT(&per_ag_lock[i], NULL); } if (rtblocks == 0) { diff --git a/repair/incore_ext.c b/repair/incore_ext.c index a012d780e..59a567fe6 100644 --- a/repair/incore_ext.c +++ b/repair/incore_ext.c @@ -24,6 +24,7 @@ #include "protos.h" #include "err_protos.h" #include "avl64.h" +#include "threads.h" #define ALLOC_NUM_EXTS 100 /* @@ -91,6 +92,13 @@ static avltree_desc_t **extent_bcnt_ptrs; /* static ba_rec_t *ba_list; static ba_rec_t *rt_ba_list; +/* + * locks. + */ +static pthread_rwlock_t ext_flist_lock; +static pthread_rwlock_t rt_ext_tree_lock; +static pthread_rwlock_t rt_ext_flist_lock; + /* * extent tree stuff is avl trees of duplicate extents, * sorted in order by block number. there is one tree per ag. @@ -104,6 +112,7 @@ mk_extent_tree_nodes(xfs_agblock_t new_startblock, extent_tree_node_t *new; extent_alloc_rec_t *rec; + PREPAIR_RW_WRITE_LOCK(&ext_flist_lock); if (ext_flist.cnt == 0) { ASSERT(ext_flist.list == NULL); @@ -130,6 +139,7 @@ mk_extent_tree_nodes(xfs_agblock_t new_startblock, ext_flist.list = (extent_tree_node_t *) new->avl_node.avl_nextino; ext_flist.cnt--; new->avl_node.avl_nextino = NULL; + PREPAIR_RW_UNLOCK(&ext_flist_lock); /* initialize node */ @@ -145,9 +155,11 @@ mk_extent_tree_nodes(xfs_agblock_t new_startblock, void release_extent_tree_node(extent_tree_node_t *node) { + PREPAIR_RW_WRITE_LOCK(&ext_flist_lock); node->avl_node.avl_nextino = (avlnode_t *) ext_flist.list; ext_flist.list = node; ext_flist.cnt++; + PREPAIR_RW_UNLOCK(&ext_flist_lock); return; } @@ -658,6 +670,7 @@ mk_rt_extent_tree_nodes(xfs_drtbno_t new_startblock, rt_extent_tree_node_t *new; rt_extent_alloc_rec_t *rec; + PREPAIR_RW_WRITE_LOCK(&rt_ext_flist_lock); if (rt_ext_flist.cnt == 0) { ASSERT(rt_ext_flist.list == NULL); @@ -684,6 +697,7 @@ mk_rt_extent_tree_nodes(xfs_drtbno_t new_startblock, rt_ext_flist.list = (rt_extent_tree_node_t *) new->avl_node.avl_nextino; rt_ext_flist.cnt--; new->avl_node.avl_nextino = NULL; + PREPAIR_RW_UNLOCK(&rt_ext_flist_lock); /* initialize node */ @@ -762,6 +776,7 @@ add_rt_dup_extent(xfs_drtbno_t startblock, xfs_extlen_t blockcount) xfs_drtbno_t new_startblock; xfs_extlen_t new_blockcount; + PREPAIR_RW_WRITE_LOCK(&rt_ext_tree_lock); avl64_findranges(rt_ext_tree_ptr, startblock - 1, startblock + blockcount + 1, (avl64node_t **) &first, (avl64node_t **) &last); @@ -779,6 +794,7 @@ add_rt_dup_extent(xfs_drtbno_t startblock, xfs_extlen_t blockcount) do_error(_("duplicate extent range\n")); } + PREPAIR_RW_UNLOCK(&rt_ext_tree_lock); return; } @@ -802,8 +818,10 @@ add_rt_dup_extent(xfs_drtbno_t startblock, xfs_extlen_t blockcount) * just bail if the new extent is contained within an old one */ if (ext->rt_startblock <= startblock && - ext->rt_blockcount >= blockcount) + ext->rt_blockcount >= blockcount) { + PREPAIR_RW_UNLOCK(&rt_ext_tree_lock); return; + } /* * now check for overlaps and adjacent extents */ @@ -831,6 +849,7 @@ add_rt_dup_extent(xfs_drtbno_t startblock, xfs_extlen_t blockcount) do_error(_("duplicate extent range\n")); } + PREPAIR_RW_UNLOCK(&rt_ext_tree_lock); return; } @@ -841,10 +860,15 @@ add_rt_dup_extent(xfs_drtbno_t startblock, xfs_extlen_t blockcount) int search_rt_dup_extent(xfs_mount_t *mp, xfs_drtbno_t bno) { - if (avl64_findrange(rt_ext_tree_ptr, bno) != NULL) - return(1); + int ret; - return(0); + PREPAIR_RW_READ_LOCK(&rt_ext_tree_lock); + if (avl64_findrange(rt_ext_tree_ptr, bno) != NULL) + ret = 1; + else + ret = 0; + PREPAIR_RW_UNLOCK(&rt_ext_tree_lock); + return(ret); } static __uint64_t @@ -873,6 +897,9 @@ incore_ext_init(xfs_mount_t *mp) ba_list = NULL; rt_ba_list = NULL; + PREPAIR_RW_LOCK_INIT(&ext_flist_lock, NULL); + PREPAIR_RW_LOCK_INIT(&rt_ext_tree_lock, NULL); + PREPAIR_RW_LOCK_INIT(&rt_ext_flist_lock, NULL); if ((extent_tree_ptrs = malloc(agcount * sizeof(avltree_desc_t *))) == NULL) diff --git a/repair/incore_ino.c b/repair/incore_ino.c index 4f3e072d8..c08da769d 100644 --- a/repair/incore_ino.c +++ b/repair/incore_ino.c @@ -22,8 +22,10 @@ #include "incore.h" #include "agheader.h" #include "protos.h" +#include "threads.h" #include "err_protos.h" +static pthread_rwlock_t ino_flist_lock; extern avlnode_t *avl_firstino(avlnode_t *root); /* @@ -69,6 +71,7 @@ mk_ino_tree_nodes(xfs_agino_t starting_ino) ino_tree_node_t *new; avlnode_t *node; + PREPAIR_RW_WRITE_LOCK(&ino_flist_lock); if (ino_flist.cnt == 0) { ASSERT(ino_flist.list == NULL); @@ -92,6 +95,7 @@ mk_ino_tree_nodes(xfs_agino_t starting_ino) ino_flist.cnt--; node = &new->avl_node; node->avl_nextino = node->avl_forw = node->avl_back = NULL; + PREPAIR_RW_UNLOCK(&ino_flist_lock); /* initialize node */ @@ -115,6 +119,7 @@ free_ino_tree_node(ino_tree_node_t *ino_rec) ino_rec->avl_node.avl_forw = NULL; ino_rec->avl_node.avl_back = NULL; + PREPAIR_RW_WRITE_LOCK(&ino_flist_lock); if (ino_flist.list != NULL) { ASSERT(ino_flist.cnt > 0); ino_rec->avl_node.avl_nextino = (avlnode_t *) ino_flist.list; @@ -132,6 +137,7 @@ free_ino_tree_node(ino_tree_node_t *ino_rec) if (ino_rec->ino_un.plist != NULL) free(ino_rec->ino_un.plist); } + PREPAIR_RW_UNLOCK(&ino_flist_lock); return; } @@ -643,6 +649,7 @@ incore_ino_init(xfs_mount_t *mp) int i; int agcount = mp->m_sb.sb_agcount; + PREPAIR_RW_LOCK_INIT(&ino_flist_lock, NULL); if ((inode_tree_ptrs = malloc(agcount * sizeof(avltree_desc_t *))) == NULL) do_error(_("couldn't malloc inode tree descriptor table\n")); diff --git a/repair/init.c b/repair/init.c index 090ebea39..2996be963 100644 --- a/repair/init.c +++ b/repair/init.c @@ -43,12 +43,17 @@ ts_alloc(pthread_key_t key, unsigned n, size_t size) } static void -ts_init(void) +ts_create(void) { /* create thread specific keys */ pthread_key_create(&dirbuf_key, NULL); pthread_key_create(&dir_freemap_key, NULL); pthread_key_create(&attr_freemap_key, NULL); +} + +void +ts_init(void) +{ /* allocate thread specific storage */ ts_alloc(dirbuf_key, 1, ts_dirbuf_size); @@ -136,6 +141,7 @@ xfs_init(libxfs_init_t *args) if (!libxfs_init(args)) do_error(_("couldn't initialize XFS library\n")); + ts_create(); ts_init(); increase_rlimit(); if (do_prefetch) { @@ -143,4 +149,5 @@ xfs_init(libxfs_init_t *args) if (do_prefetch) libxfs_lio_allocate(); } + thread_init(); } diff --git a/repair/phase3.c b/repair/phase3.c index 9f905a113..8a50bdfe8 100644 --- a/repair/phase3.c +++ b/repair/phase3.c @@ -24,6 +24,7 @@ #include "protos.h" #include "err_protos.h" #include "dinode.h" +#include "threads.h" /* * walks an unlinked list, returns 1 on an error (bogus pointer) or @@ -57,6 +58,7 @@ walk_unlinked_list(xfs_mount_t *mp, xfs_agnumber_t agno, xfs_agino_t start_ino) add_aginode_uncertain(agno, current_ino, 1); agbno = XFS_AGINO_TO_AGBNO(mp, current_ino); + PREPAIR_RW_WRITE_LOCK(&per_ag_lock[agno]); switch (state = get_agbno_state(mp, agno, agbno)) { case XR_E_UNKNOWN: @@ -64,8 +66,10 @@ walk_unlinked_list(xfs_mount_t *mp, xfs_agnumber_t agno, xfs_agino_t start_ino) case XR_E_FREE1: set_agbno_state(mp, agno, agbno, XR_E_INO); + PREPAIR_RW_UNLOCK(&per_ag_lock[agno]); break; case XR_E_BAD_STATE: + PREPAIR_RW_UNLOCK(&per_ag_lock[agno]); do_error(_( "bad state in block map %d\n"), state); @@ -84,6 +88,7 @@ walk_unlinked_list(xfs_mount_t *mp, xfs_agnumber_t agno, xfs_agino_t start_ino) */ set_agbno_state(mp, agno, agbno, XR_E_INO); + PREPAIR_RW_UNLOCK(&per_ag_lock[agno]); break; } } @@ -143,6 +148,17 @@ process_agi_unlinked(xfs_mount_t *mp, xfs_agnumber_t agno) libxfs_putbuf(bp); } +void +parallel_p3_process_aginodes(xfs_mount_t *mp, xfs_agnumber_t agno) +{ + /* + * turn on directory processing (inode discovery) and + * attribute processing (extra_attr_check) + */ + do_log(_(" - agno = %d\n"), agno); + process_aginodes(mp, agno, 1, 0, 1); +} + void phase3(xfs_mount_t *mp) { @@ -171,13 +187,9 @@ phase3(xfs_mount_t *mp) " - process known inodes and perform inode discovery...\n")); for (i = 0; i < mp->m_sb.sb_agcount; i++) { - do_log(_(" - agno = %d\n"), i); - /* - * turn on directory processing (inode discovery) and - * attribute processing (extra_attr_check) - */ - process_aginodes(mp, i, 1, 0, 1); + queue_work(parallel_p3_process_aginodes, mp, i); } + wait_for_workers(); /* * process newly discovered inode chunks diff --git a/repair/phase4.c b/repair/phase4.c index ac37adc8b..f65b5db77 100644 --- a/repair/phase4.c +++ b/repair/phase4.c @@ -28,6 +28,7 @@ #include "bmap.h" #include "versions.h" #include "dir2.h" +#include "threads.h" /* ARGSUSED */ @@ -1118,6 +1119,18 @@ quota_sb_check(xfs_mount_t *mp) } +void +parallel_p4_process_aginodes(xfs_mount_t *mp, xfs_agnumber_t agno) +{ + do_log(_(" - agno = %d\n"), agno); + process_aginodes(mp, agno, 0, 1, 0); + + /* + * now recycle the per-AG duplicate extent records + */ + release_dup_extent_tree(agno); +} + void phase4(xfs_mount_t *mp) { @@ -1325,14 +1338,9 @@ phase4(xfs_mount_t *mp) * and attribute processing is turned OFF since we did that * already in phase 3. */ - do_log(_(" - agno = %d\n"), i); - process_aginodes(mp, i, 0, 1, 0); - - /* - * now recycle the per-AG duplicate extent records - */ - release_dup_extent_tree(i); + queue_work(parallel_p4_process_aginodes, mp, i); } + wait_for_workers(); /* * free up memory used to track trealtime duplicate extents diff --git a/repair/phase5.c b/repair/phase5.c index 6c79162d2..471f2f16f 100644 --- a/repair/phase5.c +++ b/repair/phase5.c @@ -26,6 +26,7 @@ #include "dinode.h" #include "rt.h" #include "versions.h" +#include "threads.h" /* * we maintain the current slice (path from root to leaf) @@ -72,6 +73,9 @@ typedef struct bt_status { bt_stat_level_t level[XFS_BTREE_MAXLEVELS]; } bt_status_t; +static __uint64_t *sb_icount_ag; /* allocated inodes per ag */ +static __uint64_t *sb_ifree_ag; /* free inodes per ag */ +static __uint64_t *sb_fdblocks_ag; /* free data blocks per ag */ int mk_incore_fstree(xfs_mount_t *mp, xfs_agnumber_t agno) @@ -1415,14 +1419,13 @@ keep_fsinos(xfs_mount_t *mp) } void -phase5(xfs_mount_t *mp) +phase5_function(xfs_mount_t *mp, xfs_agnumber_t agno) { __uint64_t num_inos; __uint64_t num_free_inos; bt_status_t bno_btree_curs; bt_status_t bcnt_btree_curs; bt_status_t ino_btree_curs; - xfs_agnumber_t agno; int extra_blocks = 0; uint num_freeblocks; xfs_extlen_t freeblks1; @@ -1436,35 +1439,10 @@ phase5(xfs_mount_t *mp) extern int count_bcnt_extents(xfs_agnumber_t); #endif - do_log(_("Phase 5 - rebuild AG headers and trees...\n")); - -#ifdef XR_BLD_FREE_TRACE - fprintf(stderr, "inobt level 1, maxrec = %d, minrec = %d\n", - XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_inobt, 0), - XFS_BTREE_BLOCK_MINRECS(mp->m_sb.sb_blocksize, xfs_inobt, 0) - ); - fprintf(stderr, "inobt level 0 (leaf), maxrec = %d, minrec = %d\n", - XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_inobt, 1), - XFS_BTREE_BLOCK_MINRECS(mp->m_sb.sb_blocksize, xfs_inobt, 1) - ); - fprintf(stderr, "xr inobt level 0 (leaf), maxrec = %d\n", - XR_INOBT_BLOCK_MAXRECS(mp, 0)); - fprintf(stderr, "xr inobt level 1 (int), maxrec = %d\n", - XR_INOBT_BLOCK_MAXRECS(mp, 1)); - fprintf(stderr, "bnobt level 1, maxrec = %d, minrec = %d\n", - XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_alloc, 0), - XFS_BTREE_BLOCK_MINRECS(mp->m_sb.sb_blocksize, xfs_alloc, 0)); - fprintf(stderr, "bnobt level 0 (leaf), maxrec = %d, minrec = %d\n", - XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_alloc, 1), - XFS_BTREE_BLOCK_MINRECS(mp->m_sb.sb_blocksize, xfs_alloc, 1)); -#endif - - /* - * make sure the root and realtime inodes show up allocated - */ - keep_fsinos(mp); + if (verbose) + do_log(_(" - agno = %d\n"), agno); - for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { + { /* * build up incore bno and bcnt extent btrees */ @@ -1503,8 +1481,8 @@ phase5(xfs_mount_t *mp) init_ino_cursor(mp, agno, &ino_btree_curs, &num_inos, &num_free_inos); - sb_icount += num_inos; - sb_ifree += num_free_inos; + sb_icount_ag[agno] += num_inos; + sb_ifree_ag[agno] += num_free_inos; num_extents = count_bno_extents_blocks(agno, &num_freeblocks); /* @@ -1512,7 +1490,7 @@ phase5(xfs_mount_t *mp) * are counted as allocated since the space trees * always have roots */ - sb_fdblocks += num_freeblocks - 2; + sb_fdblocks_ag[agno] += num_freeblocks - 2; if (num_extents == 0) { /* @@ -1554,7 +1532,7 @@ phase5(xfs_mount_t *mp) if (extra_blocks > 0) { do_warn(_("lost %d blocks in agno %d, sorry.\n"), extra_blocks, agno); - sb_fdblocks -= extra_blocks; + sb_fdblocks_ag[agno] -= extra_blocks; } bcnt_btree_curs = bno_btree_curs; @@ -1613,6 +1591,67 @@ phase5(xfs_mount_t *mp) release_agbno_extent_tree(agno); release_agbcnt_extent_tree(agno); } +} + +void +phase5(xfs_mount_t *mp) +{ + xfs_agnumber_t agno; + + do_log(_("Phase 5 - rebuild AG headers and trees...\n")); + +#ifdef XR_BLD_FREE_TRACE + fprintf(stderr, "inobt level 1, maxrec = %d, minrec = %d\n", + XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_inobt, 0), + XFS_BTREE_BLOCK_MINRECS(mp->m_sb.sb_blocksize, xfs_inobt, 0) + ); + fprintf(stderr, "inobt level 0 (leaf), maxrec = %d, minrec = %d\n", + XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_inobt, 1), + XFS_BTREE_BLOCK_MINRECS(mp->m_sb.sb_blocksize, xfs_inobt, 1) + ); + fprintf(stderr, "xr inobt level 0 (leaf), maxrec = %d\n", + XR_INOBT_BLOCK_MAXRECS(mp, 0)); + fprintf(stderr, "xr inobt level 1 (int), maxrec = %d\n", + XR_INOBT_BLOCK_MAXRECS(mp, 1)); + fprintf(stderr, "bnobt level 1, maxrec = %d, minrec = %d\n", + XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_alloc, 0), + XFS_BTREE_BLOCK_MINRECS(mp->m_sb.sb_blocksize, xfs_alloc, 0)); + fprintf(stderr, "bnobt level 0 (leaf), maxrec = %d, minrec = %d\n", + XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_alloc, 1), + XFS_BTREE_BLOCK_MINRECS(mp->m_sb.sb_blocksize, xfs_alloc, 1)); +#endif + /* + * make sure the root and realtime inodes show up allocated + */ + keep_fsinos(mp); + + /* allocate per ag counters */ + sb_icount_ag = calloc(mp->m_sb.sb_agcount, sizeof(__uint64_t)); + if (sb_icount_ag == NULL) + do_error(_("cannot alloc sb_icount_ag buffers\n")); + + sb_ifree_ag = calloc(mp->m_sb.sb_agcount, sizeof(__uint64_t)); + if (sb_ifree_ag == NULL) + do_error(_("cannot alloc sb_ifree_ag buffers\n")); + + sb_fdblocks_ag = calloc(mp->m_sb.sb_agcount, sizeof(__uint64_t)); + if (sb_fdblocks_ag == NULL) + do_error(_("cannot alloc sb_fdblocks_ag buffers\n")); + + for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { + queue_work(phase5_function, mp, agno); + } + wait_for_workers(); + + /* aggregate per ag counters */ + for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { + sb_icount += sb_icount_ag[agno]; + sb_ifree += sb_ifree_ag[agno]; + sb_fdblocks += sb_fdblocks_ag[agno]; + } + free(sb_icount_ag); + free(sb_ifree_ag); + free(sb_fdblocks_ag); if (mp->m_sb.sb_rblocks) { do_log( @@ -1630,4 +1669,5 @@ phase5(xfs_mount_t *mp) sync_sb(mp); bad_ino_btree = 0; + } diff --git a/repair/phase7.c b/repair/phase7.c index 5f77a9c4e..e4b958638 100644 --- a/repair/phase7.c +++ b/repair/phase7.c @@ -26,6 +26,7 @@ #include "dinode.h" #include "versions.h" #include "prefetch.h" +#include "threads.h" /* dinoc is a pointer to the IN-CORE dinode core */ void @@ -180,8 +181,9 @@ phase7_alt(xfs_mount_t *mp) libxfs_bcache_purge(); for (i = 0; i < glob_agcount; i++) { - phase7_alt_function(mp, i); + queue_work(phase7_alt_function, mp, i); } + wait_for_workers(); } void diff --git a/repair/protos.h b/repair/protos.h index a70c8f159..ce79089f4 100644 --- a/repair/protos.h +++ b/repair/protos.h @@ -44,4 +44,6 @@ char * err_string(int err_code); extern void *ts_attr_freemap(void); extern void *ts_dir_freemap(void); extern void *ts_dirbuf(void); +extern void ts_init(void); +extern void thread_init(void); diff --git a/repair/threads.c b/repair/threads.c new file mode 100644 index 000000000..6b51cfefd --- /dev/null +++ b/repair/threads.c @@ -0,0 +1,308 @@ +#include +#include "pthread.h" +#include "signal.h" +#include "threads.h" +#include "err_protos.h" +#include "protos.h" + +int do_parallel = 1; +int thread_count; + +/* A quantum of work */ +typedef struct work_s { + struct work_s *next; + disp_func_t *function; + xfs_mount_t *mp; + xfs_agnumber_t agno; +} work_t; + +typedef struct work_queue_s { + work_t *next; + work_t *last; + int active_threads; + int work_count; + pthread_cond_t mcv; /* main thread conditional variable */ + pthread_cond_t wcv; /* worker threads conditional variable */ + pthread_mutex_t mutex; +} work_queue_t; + +static work_queue_t work_queue; +static pthread_t *work_threads; + +static void *worker_thread(void *arg); + +static void +init_workers(work_queue_t *wq, int nw) +{ + int err; + pthread_mutexattr_t mtxattr; + + memset(wq, 0, sizeof(work_queue_t)); + wq->active_threads = nw; + + pthread_cond_init(&wq->mcv, NULL); + pthread_cond_init(&wq->wcv, NULL); + pthread_mutexattr_init(&mtxattr); + +#ifdef PTHREAD_MUTEX_SPINBLOCK_NP + /* NP - Non Portable - Irix */ + if ((err = pthread_mutexattr_settype(&mtxattr, + PTHREAD_MUTEX_SPINBLOCK_NP)) > 0) { + do_error(_("init_workers: thread 0x%x: pthread_mutexattr_settype error %d: %s\n"), + pthread_self(), err, strerror(err)); + } +#endif +#ifdef PTHREAD_MUTEX_FAST_NP + /* NP - Non Portable - Linux */ + if ((err = pthread_mutexattr_settype(&mtxattr, + PTHREAD_MUTEX_FAST_NP)) > 0) { + do_error(_("init_workers: thread 0x%x: pthread_mutexattr_settype error %d: %s\n"), + pthread_self(), err, strerror(err)); + } +#endif + if ((err = pthread_mutex_init(&wq->mutex, &mtxattr)) > 0) { + do_error(_("init_workers: thread 0x%x: pthread_mutex_init error %d: %s\n"), + pthread_self(), err, strerror(err)); + } +} + +static void +quiesce_workers(work_queue_t *wq) +{ + int err; + + if ((err = pthread_mutex_lock(&wq->mutex)) > 0) + do_error(_("quiesce_workers: thread 0x%x: pthread_mutex_lock error %d: %s\n"), + pthread_self(), err, strerror(err)); + if (wq->active_threads > 0) { + if ((err = pthread_cond_wait(&wq->mcv, &wq->mutex)) > 0) + do_error(_("quiesce_workers: thread 0x%x: pthread_cond_wait error %d: %s\n"), + pthread_self(), err, strerror(err)); + } + ASSERT(wq->active_threads == 0); + if ((err = pthread_mutex_unlock(&wq->mutex)) > 0) + do_error(_("quiesce_workers: thread 0x%x: pthread_mutex_unlock error %d: %s\n"), + pthread_self(), err, strerror(err)); +} + +static void +start_workers(work_queue_t *wq, unsigned thcnt, pthread_attr_t *attrp) +{ + int err; + unsigned long i; + + init_workers(wq, thcnt); + + if ((work_threads = (pthread_t *)malloc(sizeof(pthread_t) * thcnt)) == NULL) + do_error(_("cannot malloc %ld bytes for work_threads array\n"), + sizeof(pthread_t) * thcnt); + + /* + ** Create worker threads + */ + for (i = 0; i < thcnt; i++) { + err = pthread_create(&work_threads[i], attrp, worker_thread, (void *) i); + if(err > 0) { + do_error(_("cannot create worker threads, status = [%d] %s\n"), + err, strerror(err)); + } + } + do_log(_(" - creating %d worker thread(s)\n"), thcnt); + + /* + ** Wait for all worker threads to initialize + */ + quiesce_workers(wq); +} + +void +thread_init(void) +{ + int status; + size_t stacksize; + pthread_attr_t attr; + sigset_t blocked; + + if (do_parallel == 0) + return; + if (thread_count == 0) + thread_count = 2 * libxfs_nproc(); + + if ((status = pthread_attr_init(&attr)) != 0) + do_error(_("status from pthread_attr_init: %d"),status); + + if ((status = pthread_attr_getstacksize(&attr, &stacksize)) != 0) + do_error(_("status from pthread_attr_getstacksize: %d"), status); + + stacksize *= 4; + + if ((status = pthread_attr_setstacksize(&attr, stacksize)) != 0) + do_error(_("status from pthread_attr_setstacksize: %d"), status); + + if ((status = pthread_setconcurrency(thread_count)) != 0) + do_error(_("Status from pthread_setconcurrency(%d): %d"), thread_count, status); + + /* + * block delivery of progress report signal to all threads + */ + sigemptyset(&blocked); + sigaddset(&blocked, SIGHUP); + sigaddset(&blocked, SIGALRM); + pthread_sigmask(SIG_BLOCK, &blocked, NULL); + + start_workers(&work_queue, thread_count, &attr); +} + +/* + * Dequeue from the head of the list. + * wq->mutex held. + */ +static work_t * +dequeue(work_queue_t *wq) +{ + work_t *wp; + + ASSERT(wq->work_count > 0); + wp = wq->next; + wq->next = wp->next; + wq->work_count--; + if (wq->next == NULL) { + ASSERT(wq->work_count == 0); + wq->last = NULL; + } + wp->next = NULL; + return (wp); +} + +static void * +worker_thread(void *arg) +{ + work_queue_t *wq; + work_t *wp; + int err; + unsigned long myid; + + wq = &work_queue; + myid = (unsigned long) arg; + ts_init(); + libxfs_lio_allocate(); + + /* + * Loop pulling work from the global work queue. + * Check for notification to exit after every chunk of work. + */ + while (1) { + if ((err = pthread_mutex_lock(&wq->mutex)) > 0) + do_error(_("work_thread%d: thread 0x%x: pthread_mutex_lock error %d: %s\n"), + myid, pthread_self(), err, strerror(err)); + /* + * Wait for work. + */ + while (wq->next == NULL) { + ASSERT(wq->work_count == 0); + /* + * Last thread going to idle sleep must wakeup + * the master thread. Same mutex is used to lock + * around two different condition variables. + */ + wq->active_threads--; + ASSERT(wq->active_threads >= 0); + if (!wq->active_threads) { + if ((err = pthread_cond_signal(&wq->mcv)) > 0) + do_error(_("work_thread%d: thread 0x%x: pthread_cond_signal error %d: %s\n"), + myid, pthread_self(), err, strerror(err)); + } + if ((err = pthread_cond_wait(&wq->wcv, &wq->mutex)) > 0) + do_error(_("work_thread%d: thread 0x%x: pthread_cond_wait error %d: %s\n"), + myid, pthread_self(), err, strerror(err)); + wq->active_threads++; + } + /* + * Dequeue work from the head of the list. + */ + ASSERT(wq->work_count > 0); + wp = dequeue(wq); + if ((err = pthread_mutex_unlock(&wq->mutex)) > 0) + do_error(_("work_thread%d: thread 0x%x: pthread_mutex_unlock error %d: %s\n"), + myid, pthread_self(), err, strerror(err)); + /* + * Do the work. + */ + (wp->function)(wp->mp, wp->agno); + + free(wp); + } + /* NOT REACHED */ + pthread_exit(NULL); + return (NULL); +} + +int +queue_work(disp_func_t func, xfs_mount_t *mp, xfs_agnumber_t agno) +{ + work_queue_t *wq; + work_t *wp; + + if (do_parallel == 0) { + func(mp, agno); + return 0; + } + wq = &work_queue; + /* + * Get memory for a new work structure. + */ + if ((wp = (work_t *)memalign(8, sizeof(work_t))) == NULL) + return (ENOMEM); + /* + * Initialize the new work structure. + */ + wp->function = func; + wp->mp = mp; + wp->agno = agno; + + /* + * Now queue the new work structure to the work queue. + */ + if (wq->next == NULL) { + wq->next = wp; + } else { + wq->last->next = wp; + } + wq->last = wp; + wp->next = NULL; + wq->work_count++; + + return (0); +} + +void +wait_for_workers(void) +{ + int err; + work_queue_t *wq; + + if (do_parallel == 0) + return; + wq = &work_queue; + if ((err = pthread_mutex_lock(&wq->mutex)) > 0) + do_error(_("wait_for_workers: thread 0x%x: pthread_mutex_lock error %d: %s\n"), + pthread_self(), err, strerror(err)); + + ASSERT(wq->active_threads == 0); + if (wq->work_count > 0) { + /* get the workers going */ + if ((err = pthread_cond_broadcast(&wq->wcv)) > 0) + do_error(_("wait_for_workers: thread 0x%x: pthread_cond_broadcast error %d: %s\n"), + pthread_self(), err, strerror(err)); + /* and wait for them */ + if ((err = pthread_cond_wait(&wq->mcv, &wq->mutex)) > 0) + do_error(_("wait_for_workers: thread 0x%x: pthread_cond_wait error %d: %s\n"), + pthread_self(), err, strerror(err)); + } + ASSERT(wq->active_threads == 0); + ASSERT(wq->work_count == 0); + + if ((err = pthread_mutex_unlock(&wq->mutex)) > 0) + do_error(_("wait_for_workers: thread 0x%x: pthread_mutex_unlock error %d: %s\n"), + pthread_self(), err, strerror(err)); +} diff --git a/repair/threads.h b/repair/threads.h new file mode 100644 index 000000000..9356e552b --- /dev/null +++ b/repair/threads.h @@ -0,0 +1,37 @@ +#ifndef _XFS_REPAIR_THREADS_H_ +#define _XFS_REPAIR_THREADS_H_ + +extern int do_parallel; +extern int thread_count; +/* +** locking variants - rwlock/mutex +*/ +#define PREPAIR_RW_LOCK_ATTR PTHREAD_PROCESS_PRIVATE + +#define PREPAIR_RW_LOCK_ALLOC(lkp, n) \ + if (do_parallel) { \ + lkp = malloc(n*sizeof(pthread_rwlock_t)); \ + if (lkp == NULL) \ + do_error("cannot alloc %d locks\n", n); \ + /* NO RETURN */ \ + } +#define PREPAIR_RW_LOCK_INIT(l,a) if (do_parallel) pthread_rwlock_init((l),(a)) +#define PREPAIR_RW_READ_LOCK(l) if (do_parallel) pthread_rwlock_rdlock((l)) +#define PREPAIR_RW_WRITE_LOCK(l) if (do_parallel) pthread_rwlock_wrlock((l)) +#define PREPAIR_RW_UNLOCK(l) if (do_parallel) pthread_rwlock_unlock((l)) +#define PREPAIR_RW_WRITE_LOCK_NOTEST(l) pthread_rwlock_wrlock((l)) +#define PREPAIR_RW_UNLOCK_NOTEST(l) pthread_rwlock_unlock((l)) +#define PREPAIR_RW_LOCK_DELETE(l) if (do_parallel) pthread_rwlock_destroy((l)) + +#define PREPAIR_MTX_LOCK_INIT(m, a) if (do_parallel) pthread_mutex_init((m), (a)) +#define PREPAIR_MTX_ATTR_INIT(a) if (do_parallel) pthread_mutexattr_init((a)) +#define PREPAIR_MTX_ATTR_SET(a, l) if (do_parallel) pthread_mutexattr_settype((a), l) +#define PREPAIR_MTX_LOCK(m) if (do_parallel) pthread_mutex_lock(m) +#define PREPAIR_MTX_UNLOCK(m) if (do_parallel) pthread_mutex_unlock(m) + + +typedef void disp_func_t(xfs_mount_t *mp, xfs_agnumber_t agno); +extern int queue_work(disp_func_t func, xfs_mount_t *mp, xfs_agnumber_t agno); +extern void wait_for_workers(void); + +#endif /* _XFS_REPAIR_THREADS_H_ */ diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c index 20cf34718..398cedc3f 100644 --- a/repair/xfs_repair.c +++ b/repair/xfs_repair.c @@ -26,6 +26,7 @@ #include "incore.h" #include "err_protos.h" #include "prefetch.h" +#include "threads.h" #define rounddown(x, y) (((x)/(y))*(y)) @@ -63,9 +64,13 @@ char *o_opts[] = { "pfdir", #define PREFETCH_AIO_CNT 6 "pfaio", +#define THREAD_CNT 7 + "thread", NULL }; +static void print_runtime(unsigned); + static void usage(void) { @@ -187,7 +192,7 @@ process_args(int argc, char **argv) * XXX have to add suboption processing here * attributes, quotas, nlinks, aligned_inos, sb_fbits */ - while ((c = getopt(argc, argv, "o:fl:r:LnDvVdP")) != EOF) { + while ((c = getopt(argc, argv, "o:fl:r:LnDvVdPM")) != EOF) { switch (c) { case 'D': dumpcore = 1; @@ -228,6 +233,9 @@ process_args(int argc, char **argv) case PREFETCH_AIO_CNT: libxfs_lio_aio_count = (int) strtol(val, 0, 0); break; + case THREAD_CNT: + thread_count = (int) strtol(val, 0, 0); + break; default: unknown('o', val); break; @@ -255,7 +263,7 @@ process_args(int argc, char **argv) dangerously = 1; break; case 'v': - verbose = 1; + verbose++; break; case 'V': printf(_("%s version %s\n"), progname, VERSION); @@ -263,6 +271,9 @@ process_args(int argc, char **argv) case 'P': do_prefetch ^= 1; break; + case 'M': + do_parallel ^= 1; + break; case '?': usage(); } @@ -458,7 +469,9 @@ main(int argc, char **argv) xfs_sb_t *sb; xfs_buf_t *sbp; xfs_mount_t xfs_m; + time_t t, start; + start = time(NULL); progname = basename(argv[0]); setlocale(LC_ALL, ""); bindtextdomain(PACKAGE, LOCALEDIR); @@ -472,6 +485,10 @@ main(int argc, char **argv) /* do phase1 to make sure we have a superblock */ phase1(temp_mp); + if (verbose) { + t = time(NULL); + fprintf(stderr, asctime(localtime(&t))); + } if (no_modify && primary_sb_modified) { do_warn(_("Primary superblock would have been modified.\n" @@ -522,18 +539,23 @@ main(int argc, char **argv) } /* make sure the per-ag freespace maps are ok so we can mount the fs */ - phase2(mp); + if (verbose) { + t = time(NULL); + fprintf(stderr, asctime(localtime(&t))); + } - if (verbose) - libxfs_report(stderr); phase3(mp); - if (verbose) - libxfs_report(stderr); + if (verbose) { + t = time(NULL); + fprintf(stderr, asctime(localtime(&t))); + } phase4(mp); - if (verbose) - libxfs_report(stderr); + if (verbose) { + t = time(NULL); + fprintf(stderr, asctime(localtime(&t))); + } /* XXX: nathans - something in phase4 ain't playing by */ /* the buffer cache rules.. why doesn't IRIX hit this? */ @@ -541,15 +563,26 @@ main(int argc, char **argv) if (no_modify) printf(_("No modify flag set, skipping phase 5\n")); - else + else { phase5(mp); + if (verbose) { + t = time(NULL); + fprintf(stderr, asctime(localtime(&t))); + } + } if (!bad_ino_btree) { phase6(mp); - if (verbose) - libxfs_report(stderr); + if (verbose) { + t = time(NULL); + fprintf(stderr, asctime(localtime(&t))); + } phase7(mp); + if (verbose) { + t = time(NULL); + fprintf(stderr, asctime(localtime(&t))); + } } else { do_warn( _("Inode allocation btrees are too corrupted, skipping phases 6 and 7\n")); @@ -609,12 +642,14 @@ _("Warning: project quota information would be cleared.\n" } } - if (verbose) + if (verbose > 1) libxfs_report(stderr); if (no_modify) { do_log( _("No modify flag set, skipping filesystem flush and exiting.\n")); + if (verbose) + print_runtime(t - start); if (fs_is_dirty) return(1); @@ -661,6 +696,33 @@ _("Note - stripe unit (%d) and width (%d) fields have been reset.\n" libxfs_device_close(x.ddev); do_log(_("done\n")); + if (verbose) { + print_runtime(t - start); + } + return (0); +} - return(0); +static void +print_runtime(unsigned s) +{ + unsigned h, m; + + h = s / 3600; + s %= 3600; + m = s / 60; + s %= 60; + if (h) { + fprintf(stderr, "Run time %d hour%s %d minute%s %d second%s\n", + h, h > 1 ? "s" : "", + m, m != 1 ? "s" : "", + s, s != 1 ? "s" : ""); + } else if (m) { + fprintf(stderr, "Run time %d minute%s %d second%s\n", + m, m > 1 ? "s" : "", + s, s != 1 ? "s" : ""); + } + else { + fprintf(stderr, "Run time %d second%s\n", + s, s != 1 ? "s" : ""); + } } -- 2.47.2