]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/commitdiff
Multi-thread modifications to xfs_repair.
authorMadan Valluri <mvalluri@sgi.com>
Tue, 29 Aug 2006 00:49:38 +0000 (00:49 +0000)
committerMadan Valluri <mvalluri@sgi.com>
Tue, 29 Aug 2006 00:49:38 +0000 (00:49 +0000)
Added declaration of libxfs_nproc.

24 files changed:
include/libxfs.h
libxfs/darwin.c
libxfs/freebsd.c
libxfs/init.c
libxfs/init.h
libxfs/irix.c
libxfs/linux.c
repair/Makefile
repair/dino_chunks.c
repair/dinode.c
repair/dir_stack.c
repair/globals.h
repair/incore.c
repair/incore_ext.c
repair/incore_ino.c
repair/init.c
repair/phase3.c
repair/phase4.c
repair/phase5.c
repair/phase7.c
repair/protos.h
repair/threads.c [new file with mode: 0644]
repair/threads.h [new file with mode: 0644]
repair/xfs_repair.c

index d4680dbae65d11de9ecb0777b907728827d8c18c..f0139449a2d8f645ebfc09a65d35f40d825e271c 100644 (file)
@@ -577,6 +577,7 @@ typedef struct  libxfs_lio_req {
 #define        LIBXFS_LIO_TYPE_RAW             0x3
 
 #define LIBXFS_BBTOOFF64(bbs)  (((xfs_off_t)(bbs)) << BBSHIFT)
+extern int libxfs_nproc(void);
 
 #include <xfs/xfs_ialloc.h>
 #include <xfs/xfs_rtalloc.h>
index cb538d982cc58332db0d8bb99c1918784200906a..3a1e833341f799c1512321d561c26d3d29d03b18 100644 (file)
@@ -118,3 +118,9 @@ platform_align_blockdev(void)
 {
        return (sizeof(void *));
 }
+
+int
+platform_nproc(void)
+{
+       return 1;
+}
index 441cb0ae9654ff2c82b5ed0161db228130379815..1cbf7aaa9a2f59f5c87b51af02f80fc83cd606ff 100644 (file)
@@ -178,3 +178,9 @@ platform_align_blockdev(void)
 {
        return (sizeof(void *));
 }
+
+int
+platform_nproc(void)
+{
+       return 1;
+}
index 7e3bb72546cede048237a999a1e1d6b1308341be..5bb9ecf892d6c34c02247050ce3f699a195fd038 100644 (file)
@@ -703,3 +703,9 @@ libxfs_report(FILE *fp)
        c = asctime(localtime(&t));
        fprintf(fp, "%s", c);
 }
+
+int
+libxfs_nproc(void)
+{
+       return platform_nproc();
+}
index 38d90b90120ba43211463f287aa73bfefb3bfb88..72249926dca3f03036498e543dd77c512240d841 100644 (file)
@@ -33,5 +33,6 @@ extern char *platform_findblockpath (char *path);
 extern int platform_direct_blockdev (void);
 extern int platform_align_blockdev (void);
 extern int platform_aio_init (int aio_count);
+extern int platform_nproc(void);
 
 #endif /* LIBXFS_INIT_H */
index 9869f77cb2024b68ceb46e88abb77cd4947c4882..a3cf647f9976fe4dcb99637240945194208d794c 100644 (file)
@@ -19,6 +19,7 @@
 #include <xfs/libxfs.h>
 #include <aio.h>
 #include <diskinfo.h>
+#include <sys/sysmp.h>
 
 extern char *progname;
 extern __int64_t findsize(char *);
@@ -102,3 +103,10 @@ platform_align_blockdev(void)
 {
        return (sizeof(void *));
 }
+
+int
+platform_nproc(void)
+{
+       return sysmp(MP_NPROCS);
+}
+
index 08befd1a388bd081995e239f7c859f5c6c9f8592..1d0b196cf402a5f8c6f040ff5021de45aee2b57d 100644 (file)
@@ -207,3 +207,9 @@ platform_align_blockdev(void)
                return getpagesize();
        return max_block_alignment;
 }
+
+int
+platform_nproc(void)
+{
+       return sysconf(_SC_NPROCESSORS_ONLN);
+}
index f10442a29cf427b4ea20006c1b901f7da108c440..835c0d3fb344d92c2bcd82dc115827ccc05bb4be 100644 (file)
@@ -9,13 +9,13 @@ LTCOMMAND = xfs_repair
 
 HFILES = agheader.h attr_repair.h avl.h avl64.h bmap.h dinode.h dir.h \
        dir2.h dir_stack.h err_protos.h globals.h incore.h protos.h rt.h \
-       scan.h versions.h prefetch.h
+       scan.h versions.h prefetch.h threads.h
 
 CFILES = agheader.c attr_repair.c avl.c avl64.c bmap.c dino_chunks.c \
        dinode.c dir.c dir2.c dir_stack.c globals.c incore.c \
        incore_bmc.c init.c incore_ext.c incore_ino.c phase1.c \
        phase2.c phase3.c phase4.c phase5.c phase6.c phase7.c rt.c sb.c \
-       prefetch.c scan.c versions.c xfs_repair.c
+       prefetch.c scan.c versions.c xfs_repair.c threads.c
 
 LLDLIBS = $(LIBXFS) $(LIBXLOG) $(LIBUUID) $(LIBPTHREAD) $(LIBRT)
 LTDEPENDENCIES = $(LIBXFS) $(LIBXLOG)
index e8ad51c0de8d6b84860317b49c22e8a73f1847c3..0b16db66952d14452d238d92cf2cfe4049eef416 100644 (file)
@@ -26,6 +26,7 @@
 #include "dir.h"
 #include "dinode.h"
 #include "prefetch.h"
+#include "threads.h"
 #include "versions.h"
 
 /*
@@ -148,16 +149,19 @@ verify_inode_chunk(xfs_mount_t            *mp,
                if (check_inode_block(mp, ino) == 0)
                        return(0);
 
+               PREPAIR_RW_WRITE_LOCK(&per_ag_lock[agno]);
                switch (state = get_agbno_state(mp, agno, agbno))  {
                case XR_E_INO:
                        do_warn(
                _("uncertain inode block %d/%d already known\n"),
                                agno, agbno);
+                       PREPAIR_RW_UNLOCK(&per_ag_lock[agno]);
                        break;
                case XR_E_UNKNOWN:
                case XR_E_FREE1:
                case XR_E_FREE:
                        set_agbno_state(mp, agno, agbno, XR_E_INO);
+                       PREPAIR_RW_UNLOCK(&per_ag_lock[agno]);
                        break;
                case XR_E_MULT:
                case XR_E_INUSE:
@@ -170,12 +174,14 @@ verify_inode_chunk(xfs_mount_t            *mp,
                _("inode block %d/%d multiply claimed, (state %d)\n"),
                                agno, agbno, state);
                        set_agbno_state(mp, agno, agbno, XR_E_MULT);
+                       PREPAIR_RW_UNLOCK(&per_ag_lock[agno]);
                        return(0);
                default:
                        do_warn(
                _("inode block %d/%d bad state, (state %d)\n"),
                                agno, agbno, state);
                        set_agbno_state(mp, agno, agbno, XR_E_INO);
+                       PREPAIR_RW_UNLOCK(&per_ag_lock[agno]);
                        break;
                }
 
@@ -425,6 +431,7 @@ verify_inode_chunk(xfs_mount_t              *mp,
         * user data -- we're probably here as a result of a directory
         * entry or an iunlinked pointer
         */
+       PREPAIR_RW_WRITE_LOCK(&per_ag_lock[agno]);
        for (j = 0, cur_agbno = chunk_start_agbno;
                        cur_agbno < chunk_stop_agbno; cur_agbno++)  {
                switch (state = get_agbno_state(mp, agno, cur_agbno))  {
@@ -447,9 +454,12 @@ verify_inode_chunk(xfs_mount_t             *mp,
                        break;
                }
 
-               if (j)
+               if (j) {
+                       PREPAIR_RW_UNLOCK(&per_ag_lock[agno]);
                        return(0);
+               }
        }
+       PREPAIR_RW_UNLOCK(&per_ag_lock[agno]);
 
        /*
         * ok, chunk is good.  put the record into the tree if required,
@@ -472,6 +482,7 @@ verify_inode_chunk(xfs_mount_t              *mp,
 
        set_inode_used(irec_p, agino - start_agino);
 
+       PREPAIR_RW_WRITE_LOCK(&per_ag_lock[agno]);
        for (cur_agbno = chunk_start_agbno;
                        cur_agbno < chunk_stop_agbno; cur_agbno++)  {
                switch (state = get_agbno_state(mp, agno, cur_agbno))  {
@@ -501,6 +512,7 @@ verify_inode_chunk(xfs_mount_t              *mp,
                        break;
                }
        }
+       PREPAIR_RW_UNLOCK(&per_ag_lock[agno]);
 
        return(ino_cnt);
 }
@@ -700,6 +712,7 @@ process_inode_chunk(xfs_mount_t *mp, xfs_agnumber_t agno, int num_inos,
        /*
         * mark block as an inode block in the incore bitmap
         */
+       PREPAIR_RW_WRITE_LOCK(&per_ag_lock[agno]);
        switch (state = get_agbno_state(mp, agno, agbno))  {
        case XR_E_INO:  /* already marked */
                break;
@@ -717,6 +730,7 @@ process_inode_chunk(xfs_mount_t *mp, xfs_agnumber_t agno, int num_inos,
                        XFS_AGB_TO_FSB(mp, agno, agbno), state);
                break;
        }
+       PREPAIR_RW_UNLOCK(&per_ag_lock[agno]);
 
        while (!done)  {
                /*
@@ -869,6 +883,7 @@ process_inode_chunk(xfs_mount_t *mp, xfs_agnumber_t agno, int num_inos,
                        ibuf_offset = 0;
                        agbno++;
 
+                       PREPAIR_RW_WRITE_LOCK(&per_ag_lock[agno]);
                        switch (state = get_agbno_state(mp, agno, agbno))  {
                        case XR_E_INO:  /* already marked */
                                break;
@@ -888,6 +903,7 @@ process_inode_chunk(xfs_mount_t *mp, xfs_agnumber_t agno, int num_inos,
                                        XFS_AGB_TO_FSB(mp, agno, agbno), state);
                                break;
                        }
+                       PREPAIR_RW_UNLOCK(&per_ag_lock[agno]);
 
                } else if (irec_offset == XFS_INODES_PER_CHUNK)  {
                        /*
index e384e3e67ccef0cbac26ce71d30bc1a40596f665..c4648f24045a90fb5c8617671f8a8ee0718d705d 100644 (file)
@@ -30,6 +30,7 @@
 #include "versions.h"
 #include "attr_repair.h"
 #include "bmap.h"
+#include "threads.h"
 
 /*
  * inode clearing routines
@@ -513,6 +514,29 @@ get_bmbt_reclist(
        return(NULLDFSBNO);
 }
 
+/*
+ * process_bmbt_reclist_int is the most compute intensive
+ * function in repair. The following macros reduce the
+ * the large number of lock/unlock steps it would otherwise
+ * call.
+ */
+#define        PROCESS_BMBT_DECL(type, var)    type var
+
+#define        PROCESS_BMBT_LOCK(agno)                                                 \
+       if (do_parallel && (agno != locked_agno)) {                             \
+               if (locked_agno != -1)  /* release old ag lock */               \
+                       PREPAIR_RW_UNLOCK_NOTEST(&per_ag_lock[locked_agno]);    \
+               PREPAIR_RW_WRITE_LOCK_NOTEST(&per_ag_lock[agno]);               \
+               locked_agno = agno;                                             \
+       }
+
+#define        PROCESS_BMBT_UNLOCK_RETURN(val)                                         \
+       do {                                                                    \
+               if (locked_agno != -1)                                          \
+                       PREPAIR_RW_UNLOCK_NOTEST(&per_ag_lock[locked_agno]);    \
+               return (val);                                                   \
+       } while (0)
+
 /*
  * return 1 if inode should be cleared, 0 otherwise
  * if check_dups should be set to 1, that implies that
@@ -552,6 +576,8 @@ process_bmbt_reclist_int(
        xfs_dfsbno_t            e;
        xfs_agnumber_t          agno;
        xfs_agblock_t           agbno;
+       PROCESS_BMBT_DECL
+                               (xfs_agnumber_t, locked_agno=-1);
 
        if (whichfork == XFS_DATA_FORK)
                forkname = _("data");
@@ -574,7 +600,7 @@ process_bmbt_reclist_int(
        _("bmap rec out of order, inode %llu entry %d "
          "[o s c] [%llu %llu %llu], %d [%llu %llu %llu]\n"),
                                ino, i, o, s, c, i-1, op, sp, cp);
-                       return(1);
+                       PROCESS_BMBT_UNLOCK_RETURN(1);
                }
                op = o;
                cp = c;
@@ -587,27 +613,27 @@ process_bmbt_reclist_int(
                        do_warn(
        _("zero length extent (off = %llu, fsbno = %llu) in ino %llu\n"),
                                o, s, ino);
-                       return(1);
+                       PROCESS_BMBT_UNLOCK_RETURN(1);
                }
                if (type == XR_INO_RTDATA) {
                        if (s >= mp->m_sb.sb_rblocks)  {
                                do_warn(
        _("inode %llu - bad rt extent start block number %llu, offset %llu\n"),
                                        ino, s, o);
-                               return(1);
+                               PROCESS_BMBT_UNLOCK_RETURN(1);
                        }
                        if (s + c - 1 >= mp->m_sb.sb_rblocks)  {
                                do_warn(
        _("inode %llu - bad rt extent last block number %llu, offset %llu\n"),
                                        ino, s + c - 1, o);
-                               return(1);
+                               PROCESS_BMBT_UNLOCK_RETURN(1);
                        }
                        if (s + c - 1 < s)  {
                                do_warn(
        _("inode %llu - bad rt extent overflows - start %llu, end %llu, "
          "offset %llu\n"),
                                        ino, s, s + c - 1, o);
-                               return(1);
+                               PROCESS_BMBT_UNLOCK_RETURN(1);
                        }
                } else  {
                        switch (verify_dfsbno_range(mp, s, c)) {
@@ -617,26 +643,26 @@ process_bmbt_reclist_int(
                                do_warn(
        _("inode %llu - bad extent starting block number %llu, offset %llu\n"),
                                        ino, s, o);
-                               return(1);
+                               PROCESS_BMBT_UNLOCK_RETURN(1);
                        case XR_DFSBNORANGE_BADEND:
                                do_warn(
        _("inode %llu - bad extent last block number %llu, offset %llu\n"),
                                        ino, s + c - 1, o);
-                               return(1);
+                               PROCESS_BMBT_UNLOCK_RETURN(1);
                        case XR_DFSBNORANGE_OVERFLOW:
                                do_warn(
 
        _("inode %llu - bad extent overflows - start %llu, end %llu, "
          "offset %llu\n"),
                                        ino, s, s + c - 1, o);
-                               return(1);
+                               PROCESS_BMBT_UNLOCK_RETURN(1);
                        }
                        if (o >= fs_max_file_offset)  {
                                do_warn(
        _("inode %llu - extent offset too large - start %llu, count %llu, "
          "offset %llu\n"),
                                        ino, s, c, o);
-                               return(1);
+                               PROCESS_BMBT_UNLOCK_RETURN(1);
                        }
                }
 
@@ -654,7 +680,7 @@ process_bmbt_reclist_int(
                                do_warn(
        _("malformed rt inode extent [%llu %llu] (fs rtext size = %u)\n"),
                                        s, c, mp->m_sb.sb_rextsize);
-                               return(1);
+                               PROCESS_BMBT_UNLOCK_RETURN(1);
                        }
 
                        /*
@@ -676,7 +702,7 @@ process_bmbt_reclist_int(
        _("data fork in rt ino %llu claims dup rt extent, off - %llu, "
          "start - %llu, count %llu\n"),
                                                        ino, o, s, c);
-                                               return(1);
+                                               PROCESS_BMBT_UNLOCK_RETURN(1);
                                        }
                                        continue;
                                }
@@ -714,7 +740,7 @@ process_bmbt_reclist_int(
                                        do_warn(
        _("%s fork in rt inode %llu claims used rt block %llu\n"),
                                                forkname, ino, ext);
-                                       return(1);
+                                       PROCESS_BMBT_UNLOCK_RETURN(1);
                                case XR_E_FREE1:
                                default:
                                        do_error(
@@ -748,6 +774,7 @@ process_bmbt_reclist_int(
                agno = XFS_FSB_TO_AGNO(mp, s);
                agbno = XFS_FSB_TO_AGBNO(mp, s);
                e = s + c;
+               PROCESS_BMBT_LOCK(agno);
                for (b = s; b < e; b++, agbno++)  {
                        if (check_dups == 1)  {
                                /*
@@ -761,7 +788,7 @@ process_bmbt_reclist_int(
        _("%s fork in ino %llu claims dup extent, off - %llu, "
          "start - %llu, cnt %llu\n"),
                                                forkname, ino, o, s, c);
-                                       return(1);
+                                       PROCESS_BMBT_UNLOCK_RETURN(1);
                                }
                                continue;
                        }
@@ -772,7 +799,7 @@ process_bmbt_reclist_int(
                         */
                        if (type == XR_INO_RTDATA && whichfork == XFS_ATTR_FORK) {
                          if (mp->m_sb.sb_agcount < agno)
-                               return(1);
+                               PROCESS_BMBT_UNLOCK_RETURN(1);
                        }
 
                        /* Process in chunks of 16 (XR_BB_UNIT/XR_BB) 
@@ -809,14 +836,14 @@ process_bmbt_reclist_int(
                                do_warn(
                        _("%s fork in inode %llu claims metadata block %llu\n"),
                                        forkname, ino, (__uint64_t) b);
-                               return(1);
+                               PROCESS_BMBT_UNLOCK_RETURN(1);
                        case XR_E_INUSE:
                        case XR_E_MULT:
                                set_agbno_state(mp, agno, agbno, XR_E_MULT);
                                do_warn(
                        _("%s fork in %s inode %llu claims used block %llu\n"),
                                        forkname, ftype, ino, (__uint64_t) b);
-                               return(1);
+                               PROCESS_BMBT_UNLOCK_RETURN(1);
                        default:
                                do_error(
                        _("illegal state %d in block map %llu\n"),
@@ -827,7 +854,7 @@ process_bmbt_reclist_int(
                *tot += c;
        }
 
-       return(0);
+       PROCESS_BMBT_UNLOCK_RETURN(0);
 }
 
 /*
index 1995d5adae9ec16585efa85c1c1c74f5cffe4d66..8cf5835d61fe917f1ca2edde9eddcb7bce58cd0b 100644 (file)
@@ -19,6 +19,7 @@
 #include <libxfs.h>
 #include "dir_stack.h"
 #include "err_protos.h"
+#include "threads.h"
 
 /*
  * a directory stack for holding directories while
@@ -29,6 +30,9 @@
 
 static dir_stack_t     dirstack_freelist;
 static int             dirstack_init = 0;
+static pthread_mutex_t dirstack_mutex;
+static pthread_mutexattr_t dirstack_mutexattr;
+
 
 void
 dir_stack_init(dir_stack_t *stack)
@@ -38,6 +42,11 @@ dir_stack_init(dir_stack_t *stack)
 
        if (dirstack_init == 0)  {
                dirstack_init = 1;
+               PREPAIR_MTX_ATTR_INIT(&dirstack_mutexattr);
+#ifdef PTHREAD_MUTEX_SPINBLOCK_NP
+               PREPAIR_MTX_ATTR_SET(&dirstack_mutexattr, PTHREAD_MUTEX_SPINBLOCK_NP);
+#endif
+               PREPAIR_MTX_LOCK_INIT(&dirstack_mutex, &dirstack_mutexattr);
                dir_stack_init(&dirstack_freelist);
        }
 
@@ -85,8 +94,10 @@ push_dir(dir_stack_t *stack, xfs_ino_t ino)
 {
        dir_stack_elem_t *elem;
 
+       PREPAIR_MTX_LOCK(&dirstack_mutex);
        if (dirstack_freelist.cnt == 0)  {
                if ((elem = malloc(sizeof(dir_stack_elem_t))) == NULL)  {
+                       PREPAIR_MTX_UNLOCK(&dirstack_mutex);
                        do_error(
                _("couldn't malloc dir stack element, try more swap\n"));
                        exit(1);
@@ -94,6 +105,7 @@ push_dir(dir_stack_t *stack, xfs_ino_t ino)
        } else  {
                elem = dir_stack_pop(&dirstack_freelist);
        }
+       PREPAIR_MTX_UNLOCK(&dirstack_mutex);
 
        elem->ino = ino;
 
@@ -116,7 +128,9 @@ pop_dir(dir_stack_t *stack)
        ino = elem->ino;
        elem->ino = NULLFSINO;
 
+       PREPAIR_MTX_LOCK(&dirstack_mutex);
        dir_stack_push(&dirstack_freelist, elem);
+       PREPAIR_MTX_UNLOCK(&dirstack_mutex);
 
        return(ino);
 }
index 8194facfa79036ba2c02e034c9ec0d7cf57f5a13..f581bcf166093327a63be97b16a67ab7a5674c6b 100644 (file)
@@ -191,8 +191,10 @@ EXTERN xfs_extlen_t        sb_inoalignmt;
 EXTERN __uint32_t      sb_unit;
 EXTERN __uint32_t      sb_width;
 
-extern size_t ts_dirbuf_size;
-extern size_t ts_dir_freemap_size;
-extern size_t ts_attr_freemap_size;
+extern size_t          ts_dirbuf_size;
+extern size_t          ts_dir_freemap_size;
+extern size_t          ts_attr_freemap_size;
+
+EXTERN pthread_rwlock_t        *per_ag_lock;
 
 #endif /* _XFS_REPAIR_GLOBAL_H */
index 2be59d20bcb8e16e8187d7a558f5d1b3790a8a0e..8598b6f1f60a3f59387a8acb7691a0d0f00b18c7 100644 (file)
@@ -23,6 +23,7 @@
 #include "agheader.h"
 #include "protos.h"
 #include "err_protos.h"
+#include "threads.h"
 
 /*
  * push a block allocation record onto list.  assumes list
@@ -64,6 +65,7 @@ setup_bmap(xfs_agnumber_t agno, xfs_agblock_t numblocks, xfs_drtbno_t rtblocks)
                do_error(_("couldn't allocate block map pointers\n"));
                return;
        }
+       PREPAIR_RW_LOCK_ALLOC(per_ag_lock, agno);
        for (i = 0; i < agno; i++)  {
                size = roundup((numblocks+(NBBY/XR_BB)-1) / (NBBY/XR_BB),
                                sizeof(__uint64_t));
@@ -75,6 +77,7 @@ setup_bmap(xfs_agnumber_t agno, xfs_agblock_t numblocks, xfs_drtbno_t rtblocks)
                        return;
                }
                bzero(ba_bmap[i], size);
+               PREPAIR_RW_LOCK_INIT(&per_ag_lock[i], NULL);
        }
 
        if (rtblocks == 0)  {
index a012d780e170ba625d07aa255661350bf003a456..59a567fe68e8737ace41d826e0df9c614829b942 100644 (file)
@@ -24,6 +24,7 @@
 #include "protos.h"
 #include "err_protos.h"
 #include "avl64.h"
+#include "threads.h"
 #define ALLOC_NUM_EXTS         100
 
 /*
@@ -91,6 +92,13 @@ static avltree_desc_t        **extent_bcnt_ptrs;     /*
 static ba_rec_t                *ba_list;
 static ba_rec_t                *rt_ba_list;
 
+/*
+ * locks.
+ */
+static pthread_rwlock_t ext_flist_lock;
+static pthread_rwlock_t rt_ext_tree_lock;
+static pthread_rwlock_t rt_ext_flist_lock;
+
 /*
  * extent tree stuff is avl trees of duplicate extents,
  * sorted in order by block number.  there is one tree per ag.
@@ -104,6 +112,7 @@ mk_extent_tree_nodes(xfs_agblock_t new_startblock,
        extent_tree_node_t *new;
        extent_alloc_rec_t *rec;
 
+       PREPAIR_RW_WRITE_LOCK(&ext_flist_lock);
        if (ext_flist.cnt == 0)  {
                ASSERT(ext_flist.list == NULL);
 
@@ -130,6 +139,7 @@ mk_extent_tree_nodes(xfs_agblock_t new_startblock,
        ext_flist.list = (extent_tree_node_t *) new->avl_node.avl_nextino;
        ext_flist.cnt--;
        new->avl_node.avl_nextino = NULL;
+       PREPAIR_RW_UNLOCK(&ext_flist_lock);
 
        /* initialize node */
 
@@ -145,9 +155,11 @@ mk_extent_tree_nodes(xfs_agblock_t new_startblock,
 void
 release_extent_tree_node(extent_tree_node_t *node)
 {
+       PREPAIR_RW_WRITE_LOCK(&ext_flist_lock);
        node->avl_node.avl_nextino = (avlnode_t *) ext_flist.list;
        ext_flist.list = node;
        ext_flist.cnt++;
+       PREPAIR_RW_UNLOCK(&ext_flist_lock);
 
        return;
 }
@@ -658,6 +670,7 @@ mk_rt_extent_tree_nodes(xfs_drtbno_t new_startblock,
        rt_extent_tree_node_t *new;
        rt_extent_alloc_rec_t *rec;
 
+       PREPAIR_RW_WRITE_LOCK(&rt_ext_flist_lock);
        if (rt_ext_flist.cnt == 0)  {
                ASSERT(rt_ext_flist.list == NULL);
 
@@ -684,6 +697,7 @@ mk_rt_extent_tree_nodes(xfs_drtbno_t new_startblock,
        rt_ext_flist.list = (rt_extent_tree_node_t *) new->avl_node.avl_nextino;
        rt_ext_flist.cnt--;
        new->avl_node.avl_nextino = NULL;
+       PREPAIR_RW_UNLOCK(&rt_ext_flist_lock);
 
        /* initialize node */
 
@@ -762,6 +776,7 @@ add_rt_dup_extent(xfs_drtbno_t startblock, xfs_extlen_t blockcount)
        xfs_drtbno_t new_startblock;
        xfs_extlen_t new_blockcount;
 
+       PREPAIR_RW_WRITE_LOCK(&rt_ext_tree_lock);
        avl64_findranges(rt_ext_tree_ptr, startblock - 1,
                startblock + blockcount + 1,
                (avl64node_t **) &first, (avl64node_t **) &last);
@@ -779,6 +794,7 @@ add_rt_dup_extent(xfs_drtbno_t startblock, xfs_extlen_t blockcount)
                        do_error(_("duplicate extent range\n"));
                }
 
+               PREPAIR_RW_UNLOCK(&rt_ext_tree_lock);
                return;
        }
 
@@ -802,8 +818,10 @@ add_rt_dup_extent(xfs_drtbno_t startblock, xfs_extlen_t blockcount)
                 * just bail if the new extent is contained within an old one
                 */
                if (ext->rt_startblock <= startblock &&
-                               ext->rt_blockcount >= blockcount)
+                               ext->rt_blockcount >= blockcount) {
+                       PREPAIR_RW_UNLOCK(&rt_ext_tree_lock);
                        return;
+               }
                /*
                 * now check for overlaps and adjacent extents
                 */
@@ -831,6 +849,7 @@ add_rt_dup_extent(xfs_drtbno_t startblock, xfs_extlen_t blockcount)
                do_error(_("duplicate extent range\n"));
        }
 
+       PREPAIR_RW_UNLOCK(&rt_ext_tree_lock);
        return;
 }
 
@@ -841,10 +860,15 @@ add_rt_dup_extent(xfs_drtbno_t startblock, xfs_extlen_t blockcount)
 int
 search_rt_dup_extent(xfs_mount_t *mp, xfs_drtbno_t bno)
 {
-       if (avl64_findrange(rt_ext_tree_ptr, bno) != NULL)
-               return(1);
+       int ret;
 
-       return(0);
+       PREPAIR_RW_READ_LOCK(&rt_ext_tree_lock);
+       if (avl64_findrange(rt_ext_tree_ptr, bno) != NULL)
+               ret = 1;
+       else
+               ret = 0;
+       PREPAIR_RW_UNLOCK(&rt_ext_tree_lock);
+       return(ret);
 }
 
 static __uint64_t
@@ -873,6 +897,9 @@ incore_ext_init(xfs_mount_t *mp)
 
        ba_list = NULL;
        rt_ba_list = NULL;
+       PREPAIR_RW_LOCK_INIT(&ext_flist_lock, NULL);
+       PREPAIR_RW_LOCK_INIT(&rt_ext_tree_lock, NULL);
+       PREPAIR_RW_LOCK_INIT(&rt_ext_flist_lock, NULL);
 
        if ((extent_tree_ptrs = malloc(agcount *
                                        sizeof(avltree_desc_t *))) == NULL)
index 4f3e072d8736b3ce80449099390acbb3abf40cfb..c08da769dadce87f3c1b8d7823c20e6f9aee8bd4 100644 (file)
 #include "incore.h"
 #include "agheader.h"
 #include "protos.h"
+#include "threads.h"
 #include "err_protos.h"
 
+static pthread_rwlock_t ino_flist_lock;
 extern avlnode_t       *avl_firstino(avlnode_t *root);
 
 /*
@@ -69,6 +71,7 @@ mk_ino_tree_nodes(xfs_agino_t starting_ino)
        ino_tree_node_t *new;
        avlnode_t *node;
 
+       PREPAIR_RW_WRITE_LOCK(&ino_flist_lock);
        if (ino_flist.cnt == 0)  {
                ASSERT(ino_flist.list == NULL);
 
@@ -92,6 +95,7 @@ mk_ino_tree_nodes(xfs_agino_t starting_ino)
        ino_flist.cnt--;
        node = &new->avl_node;
        node->avl_nextino = node->avl_forw = node->avl_back = NULL;
+       PREPAIR_RW_UNLOCK(&ino_flist_lock);
 
        /* initialize node */
 
@@ -115,6 +119,7 @@ free_ino_tree_node(ino_tree_node_t *ino_rec)
        ino_rec->avl_node.avl_forw = NULL;
        ino_rec->avl_node.avl_back = NULL;
 
+       PREPAIR_RW_WRITE_LOCK(&ino_flist_lock);
        if (ino_flist.list != NULL)  {
                ASSERT(ino_flist.cnt > 0);
                ino_rec->avl_node.avl_nextino = (avlnode_t *) ino_flist.list;
@@ -132,6 +137,7 @@ free_ino_tree_node(ino_tree_node_t *ino_rec)
                if (ino_rec->ino_un.plist != NULL)
                        free(ino_rec->ino_un.plist);
        }
+       PREPAIR_RW_UNLOCK(&ino_flist_lock);
 
        return;
 }
@@ -643,6 +649,7 @@ incore_ino_init(xfs_mount_t *mp)
        int i;
        int agcount = mp->m_sb.sb_agcount;
 
+       PREPAIR_RW_LOCK_INIT(&ino_flist_lock, NULL);
        if ((inode_tree_ptrs = malloc(agcount *
                                        sizeof(avltree_desc_t *))) == NULL)
                do_error(_("couldn't malloc inode tree descriptor table\n"));
index 090ebea398179e2985e7ced0899995ee0ea8798f..2996be96371cb16dfa4f61b4618de072105132ee 100644 (file)
@@ -43,12 +43,17 @@ ts_alloc(pthread_key_t key, unsigned n, size_t size)
 }
 
 static void
-ts_init(void)
+ts_create(void)
 {
        /* create thread specific keys */
        pthread_key_create(&dirbuf_key, NULL);
        pthread_key_create(&dir_freemap_key, NULL);
        pthread_key_create(&attr_freemap_key, NULL);
+}
+
+void
+ts_init(void)
+{
 
        /* allocate thread specific storage */
        ts_alloc(dirbuf_key, 1, ts_dirbuf_size);
@@ -136,6 +141,7 @@ xfs_init(libxfs_init_t *args)
        if (!libxfs_init(args))
                do_error(_("couldn't initialize XFS library\n"));
 
+       ts_create();
        ts_init();
        increase_rlimit();
        if (do_prefetch) {
@@ -143,4 +149,5 @@ xfs_init(libxfs_init_t *args)
                if (do_prefetch)
                        libxfs_lio_allocate();
        }
+       thread_init();
 }
index 9f905a113e5321c86af954b331b16f3b3f568a5b..8a50bdfe8d6b5dd5d00bfc52f0af356d8e4e29c8 100644 (file)
@@ -24,6 +24,7 @@
 #include "protos.h"
 #include "err_protos.h"
 #include "dinode.h"
+#include "threads.h"
 
 /*
  * walks an unlinked list, returns 1 on an error (bogus pointer) or
@@ -57,6 +58,7 @@ walk_unlinked_list(xfs_mount_t *mp, xfs_agnumber_t agno, xfs_agino_t start_ino)
                                add_aginode_uncertain(agno, current_ino, 1);
                                agbno = XFS_AGINO_TO_AGBNO(mp, current_ino);
 
+                               PREPAIR_RW_WRITE_LOCK(&per_ag_lock[agno]);
                                switch (state = get_agbno_state(mp,
                                                        agno, agbno))  {
                                case XR_E_UNKNOWN:
@@ -64,8 +66,10 @@ walk_unlinked_list(xfs_mount_t *mp, xfs_agnumber_t agno, xfs_agino_t start_ino)
                                case XR_E_FREE1:
                                        set_agbno_state(mp, agno, agbno,
                                                XR_E_INO);
+                                       PREPAIR_RW_UNLOCK(&per_ag_lock[agno]);
                                        break;
                                case XR_E_BAD_STATE:
+                                       PREPAIR_RW_UNLOCK(&per_ag_lock[agno]);
                                        do_error(_(
                                                "bad state in block map %d\n"),
                                                state);
@@ -84,6 +88,7 @@ walk_unlinked_list(xfs_mount_t *mp, xfs_agnumber_t agno, xfs_agino_t start_ino)
                                         */
                                        set_agbno_state(mp, agno, agbno,
                                                XR_E_INO);
+                                       PREPAIR_RW_UNLOCK(&per_ag_lock[agno]);
                                        break;
                                }
                        }
@@ -143,6 +148,17 @@ process_agi_unlinked(xfs_mount_t *mp, xfs_agnumber_t agno)
                libxfs_putbuf(bp);
 }
 
+void
+parallel_p3_process_aginodes(xfs_mount_t *mp, xfs_agnumber_t agno)
+{
+       /*
+        * turn on directory processing (inode discovery) and
+        * attribute processing (extra_attr_check)
+        */
+       do_log(_("        - agno = %d\n"), agno);
+       process_aginodes(mp, agno, 1, 0, 1);
+}
+
 void
 phase3(xfs_mount_t *mp)
 {
@@ -171,13 +187,9 @@ phase3(xfs_mount_t *mp)
            "        - process known inodes and perform inode discovery...\n"));
 
        for (i = 0; i < mp->m_sb.sb_agcount; i++)  {
-               do_log(_("        - agno = %d\n"), i);
-               /*
-                * turn on directory processing (inode discovery) and
-                * attribute processing (extra_attr_check)
-                */
-               process_aginodes(mp, i, 1, 0, 1);
+               queue_work(parallel_p3_process_aginodes, mp, i);
        }
+       wait_for_workers();
 
        /*
         * process newly discovered inode chunks
index ac37adc8b7d4169b58db7c5ea652f21138cd4131..f65b5db7765a91b0a858d81addd48086af7eea02 100644 (file)
@@ -28,6 +28,7 @@
 #include "bmap.h"
 #include "versions.h"
 #include "dir2.h"
+#include "threads.h"
 
 
 /* ARGSUSED */
@@ -1118,6 +1119,18 @@ quota_sb_check(xfs_mount_t *mp)
 }
 
 
+void
+parallel_p4_process_aginodes(xfs_mount_t *mp, xfs_agnumber_t agno)
+{
+       do_log(_("        - agno = %d\n"), agno);
+       process_aginodes(mp, agno, 0, 1, 0);
+
+       /*
+        * now recycle the per-AG duplicate extent records
+        */
+       release_dup_extent_tree(agno);
+}
+
 void
 phase4(xfs_mount_t *mp)
 {
@@ -1325,14 +1338,9 @@ phase4(xfs_mount_t *mp)
                 * and attribute processing is turned OFF since we did that
                 * already in phase 3.
                 */
-               do_log(_("        - agno = %d\n"), i);
-               process_aginodes(mp, i, 0, 1, 0);
-
-               /*
-                * now recycle the per-AG duplicate extent records
-                */
-               release_dup_extent_tree(i);
+               queue_work(parallel_p4_process_aginodes, mp, i);
        }
+       wait_for_workers();
 
        /*
         * free up memory used to track trealtime duplicate extents
index 6c79162d2b78bea70141912bd9f4118162229f99..471f2f16fa227458fe3827d43c79d768b472db65 100644 (file)
@@ -26,6 +26,7 @@
 #include "dinode.h"
 #include "rt.h"
 #include "versions.h"
+#include "threads.h"
 
 /*
  * we maintain the current slice (path from root to leaf)
@@ -72,6 +73,9 @@ typedef struct bt_status  {
        bt_stat_level_t         level[XFS_BTREE_MAXLEVELS];
 } bt_status_t;
 
+static __uint64_t      *sb_icount_ag;          /* allocated inodes per ag */
+static __uint64_t      *sb_ifree_ag;           /* free inodes per ag */
+static __uint64_t      *sb_fdblocks_ag;        /* free data blocks per ag */
 
 int
 mk_incore_fstree(xfs_mount_t *mp, xfs_agnumber_t agno)
@@ -1415,14 +1419,13 @@ keep_fsinos(xfs_mount_t *mp)
 }
 
 void
-phase5(xfs_mount_t *mp)
+phase5_function(xfs_mount_t *mp, xfs_agnumber_t agno)
 {
        __uint64_t      num_inos;
        __uint64_t      num_free_inos;
        bt_status_t     bno_btree_curs;
        bt_status_t     bcnt_btree_curs;
        bt_status_t     ino_btree_curs;
-       xfs_agnumber_t  agno;
        int             extra_blocks = 0;
        uint            num_freeblocks;
        xfs_extlen_t    freeblks1;
@@ -1436,35 +1439,10 @@ phase5(xfs_mount_t *mp)
        extern int      count_bcnt_extents(xfs_agnumber_t);
 #endif
 
-       do_log(_("Phase 5 - rebuild AG headers and trees...\n"));
-
-#ifdef XR_BLD_FREE_TRACE
-       fprintf(stderr, "inobt level 1, maxrec = %d, minrec = %d\n",
-               XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_inobt, 0),
-               XFS_BTREE_BLOCK_MINRECS(mp->m_sb.sb_blocksize, xfs_inobt, 0)
-               );
-       fprintf(stderr, "inobt level 0 (leaf), maxrec = %d, minrec = %d\n",
-               XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_inobt, 1),
-               XFS_BTREE_BLOCK_MINRECS(mp->m_sb.sb_blocksize, xfs_inobt, 1)
-               );
-       fprintf(stderr, "xr inobt level 0 (leaf), maxrec = %d\n",
-               XR_INOBT_BLOCK_MAXRECS(mp, 0));
-       fprintf(stderr, "xr inobt level 1 (int), maxrec = %d\n",
-               XR_INOBT_BLOCK_MAXRECS(mp, 1));
-       fprintf(stderr, "bnobt level 1, maxrec = %d, minrec = %d\n",
-               XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_alloc, 0),
-               XFS_BTREE_BLOCK_MINRECS(mp->m_sb.sb_blocksize, xfs_alloc, 0));
-       fprintf(stderr, "bnobt level 0 (leaf), maxrec = %d, minrec = %d\n",
-               XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_alloc, 1),
-               XFS_BTREE_BLOCK_MINRECS(mp->m_sb.sb_blocksize, xfs_alloc, 1));
-#endif
-
-       /*
-        * make sure the root and realtime inodes show up allocated
-        */
-       keep_fsinos(mp);
+       if (verbose)
+               do_log(_("        - agno = %d\n"), agno);
 
-       for (agno = 0; agno < mp->m_sb.sb_agcount; agno++)  {
+       {
                /*
                 * build up incore bno and bcnt extent btrees
                 */
@@ -1503,8 +1481,8 @@ phase5(xfs_mount_t *mp)
                init_ino_cursor(mp, agno, &ino_btree_curs,
                                &num_inos, &num_free_inos);
 
-               sb_icount += num_inos;
-               sb_ifree += num_free_inos;
+               sb_icount_ag[agno] += num_inos;
+               sb_ifree_ag[agno] += num_free_inos;
 
                num_extents = count_bno_extents_blocks(agno, &num_freeblocks);
                /*
@@ -1512,7 +1490,7 @@ phase5(xfs_mount_t *mp)
                 * are counted as allocated since the space trees
                 * always have roots
                 */
-               sb_fdblocks += num_freeblocks - 2;
+               sb_fdblocks_ag[agno] += num_freeblocks - 2;
 
                if (num_extents == 0)  {
                        /*
@@ -1554,7 +1532,7 @@ phase5(xfs_mount_t *mp)
                if (extra_blocks > 0)  {
                        do_warn(_("lost %d blocks in agno %d, sorry.\n"),
                                extra_blocks, agno);
-                       sb_fdblocks -= extra_blocks;
+                       sb_fdblocks_ag[agno] -= extra_blocks;
                }
 
                bcnt_btree_curs = bno_btree_curs;
@@ -1613,6 +1591,67 @@ phase5(xfs_mount_t *mp)
                release_agbno_extent_tree(agno);
                release_agbcnt_extent_tree(agno);
        }
+}
+
+void
+phase5(xfs_mount_t *mp)
+{
+       xfs_agnumber_t agno;
+
+       do_log(_("Phase 5 - rebuild AG headers and trees...\n"));
+
+#ifdef XR_BLD_FREE_TRACE
+       fprintf(stderr, "inobt level 1, maxrec = %d, minrec = %d\n",
+               XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_inobt, 0),
+               XFS_BTREE_BLOCK_MINRECS(mp->m_sb.sb_blocksize, xfs_inobt, 0)
+               );
+       fprintf(stderr, "inobt level 0 (leaf), maxrec = %d, minrec = %d\n",
+               XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_inobt, 1),
+               XFS_BTREE_BLOCK_MINRECS(mp->m_sb.sb_blocksize, xfs_inobt, 1)
+               );
+       fprintf(stderr, "xr inobt level 0 (leaf), maxrec = %d\n",
+               XR_INOBT_BLOCK_MAXRECS(mp, 0));
+       fprintf(stderr, "xr inobt level 1 (int), maxrec = %d\n",
+               XR_INOBT_BLOCK_MAXRECS(mp, 1));
+       fprintf(stderr, "bnobt level 1, maxrec = %d, minrec = %d\n",
+               XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_alloc, 0),
+               XFS_BTREE_BLOCK_MINRECS(mp->m_sb.sb_blocksize, xfs_alloc, 0));
+       fprintf(stderr, "bnobt level 0 (leaf), maxrec = %d, minrec = %d\n",
+               XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_alloc, 1),
+               XFS_BTREE_BLOCK_MINRECS(mp->m_sb.sb_blocksize, xfs_alloc, 1));
+#endif
+       /*
+        * make sure the root and realtime inodes show up allocated
+        */
+       keep_fsinos(mp);
+
+       /* allocate per ag counters */
+       sb_icount_ag = calloc(mp->m_sb.sb_agcount, sizeof(__uint64_t));
+       if (sb_icount_ag == NULL)
+               do_error(_("cannot alloc sb_icount_ag buffers\n"));
+
+       sb_ifree_ag = calloc(mp->m_sb.sb_agcount, sizeof(__uint64_t));
+       if (sb_ifree_ag == NULL)
+               do_error(_("cannot alloc sb_ifree_ag buffers\n"));
+
+       sb_fdblocks_ag = calloc(mp->m_sb.sb_agcount, sizeof(__uint64_t));
+       if (sb_fdblocks_ag == NULL)
+               do_error(_("cannot alloc sb_fdblocks_ag buffers\n"));
+
+       for (agno = 0; agno < mp->m_sb.sb_agcount; agno++)  {
+               queue_work(phase5_function, mp, agno);
+       }
+       wait_for_workers();
+
+       /* aggregate per ag counters */
+       for (agno = 0; agno < mp->m_sb.sb_agcount; agno++)  {
+               sb_icount += sb_icount_ag[agno];
+               sb_ifree += sb_ifree_ag[agno];
+               sb_fdblocks += sb_fdblocks_ag[agno];
+       }
+       free(sb_icount_ag);
+       free(sb_ifree_ag);
+       free(sb_fdblocks_ag);
 
        if (mp->m_sb.sb_rblocks)  {
                do_log(
@@ -1630,4 +1669,5 @@ phase5(xfs_mount_t *mp)
        sync_sb(mp);
 
        bad_ino_btree = 0;
+
 }
index 5f77a9c4e027e3e5f2454d16f19ae84580ad04b2..e4b958638e567dff309d26dfef73f6025579f0f8 100644 (file)
@@ -26,6 +26,7 @@
 #include "dinode.h"
 #include "versions.h"
 #include "prefetch.h"
+#include "threads.h"
 
 /* dinoc is a pointer to the IN-CORE dinode core */
 void
@@ -180,8 +181,9 @@ phase7_alt(xfs_mount_t *mp)
        libxfs_bcache_purge();
 
        for (i = 0; i < glob_agcount; i++)  {
-               phase7_alt_function(mp, i);
+               queue_work(phase7_alt_function, mp, i);
        }
+       wait_for_workers();
 }
 
 void
index a70c8f1593b29442904a852a6a180857fed3af1d..ce79089f4d8f5ca26e4856e0fa3dbe7da48b1173 100644 (file)
@@ -44,4 +44,6 @@ char *        err_string(int err_code);
 extern void *ts_attr_freemap(void);
 extern void *ts_dir_freemap(void);
 extern void *ts_dirbuf(void);
+extern void ts_init(void);
+extern void thread_init(void);
 
diff --git a/repair/threads.c b/repair/threads.c
new file mode 100644 (file)
index 0000000..6b51cfe
--- /dev/null
@@ -0,0 +1,308 @@
+#include <libxfs.h>
+#include "pthread.h"
+#include "signal.h"
+#include "threads.h"
+#include "err_protos.h"
+#include "protos.h"
+
+int do_parallel = 1;
+int thread_count;
+
+/* A quantum of work */
+typedef struct work_s {
+       struct work_s   *next;
+       disp_func_t     *function;
+       xfs_mount_t     *mp;
+       xfs_agnumber_t  agno;
+} work_t;
+
+typedef struct  work_queue_s {
+       work_t          *next;
+       work_t          *last;
+       int             active_threads;
+       int             work_count;
+       pthread_cond_t  mcv;    /* main thread conditional variable */
+       pthread_cond_t  wcv;    /* worker threads conditional variable */
+       pthread_mutex_t mutex;
+} work_queue_t;
+
+static work_queue_t    work_queue;
+static pthread_t       *work_threads;
+
+static void    *worker_thread(void *arg);
+
+static void
+init_workers(work_queue_t *wq, int nw)
+{
+       int                     err;
+       pthread_mutexattr_t     mtxattr;
+
+       memset(wq, 0, sizeof(work_queue_t));
+       wq->active_threads = nw;
+
+       pthread_cond_init(&wq->mcv, NULL);
+       pthread_cond_init(&wq->wcv, NULL);
+       pthread_mutexattr_init(&mtxattr);
+
+#ifdef PTHREAD_MUTEX_SPINBLOCK_NP
+       /* NP - Non Portable - Irix */
+       if ((err = pthread_mutexattr_settype(&mtxattr,
+                       PTHREAD_MUTEX_SPINBLOCK_NP)) > 0) {
+               do_error(_("init_workers: thread 0x%x: pthread_mutexattr_settype error %d: %s\n"),
+                       pthread_self(), err, strerror(err));
+       }
+#endif
+#ifdef PTHREAD_MUTEX_FAST_NP
+       /* NP - Non Portable - Linux */
+       if ((err = pthread_mutexattr_settype(&mtxattr,
+                       PTHREAD_MUTEX_FAST_NP)) > 0) {
+               do_error(_("init_workers: thread 0x%x: pthread_mutexattr_settype error %d: %s\n"),
+                       pthread_self(), err, strerror(err));
+       }
+#endif
+       if ((err = pthread_mutex_init(&wq->mutex, &mtxattr)) > 0) {
+               do_error(_("init_workers: thread 0x%x: pthread_mutex_init error %d: %s\n"),
+                       pthread_self(), err, strerror(err));
+       }
+}
+
+static void
+quiesce_workers(work_queue_t *wq)
+{
+       int     err;
+
+       if ((err = pthread_mutex_lock(&wq->mutex)) > 0)
+               do_error(_("quiesce_workers: thread 0x%x: pthread_mutex_lock error %d: %s\n"),
+                       pthread_self(), err, strerror(err));
+       if (wq->active_threads > 0) {
+               if ((err = pthread_cond_wait(&wq->mcv, &wq->mutex)) > 0)
+                       do_error(_("quiesce_workers: thread 0x%x: pthread_cond_wait error %d: %s\n"),
+                               pthread_self(), err, strerror(err));
+       }
+       ASSERT(wq->active_threads == 0);
+       if ((err = pthread_mutex_unlock(&wq->mutex)) > 0)
+               do_error(_("quiesce_workers: thread 0x%x: pthread_mutex_unlock error %d: %s\n"),
+                       pthread_self(), err, strerror(err));
+}
+
+static void
+start_workers(work_queue_t *wq, unsigned thcnt, pthread_attr_t *attrp)
+{
+       int             err;
+       unsigned long   i;
+
+       init_workers(wq, thcnt);
+
+       if ((work_threads = (pthread_t *)malloc(sizeof(pthread_t) * thcnt)) == NULL)
+               do_error(_("cannot malloc %ld bytes for work_threads array\n"), 
+                               sizeof(pthread_t) * thcnt);
+
+       /*
+       **  Create worker threads
+       */
+       for (i = 0; i < thcnt; i++) {
+               err = pthread_create(&work_threads[i], attrp, worker_thread, (void *) i);
+               if(err > 0) {
+                       do_error(_("cannot create worker threads, status = [%d] %s\n"),
+                               err, strerror(err));
+               }
+       }
+       do_log(_("        - creating %d worker thread(s)\n"), thcnt);
+
+       /*
+       **  Wait for all worker threads to initialize
+       */
+       quiesce_workers(wq);
+}
+
+void
+thread_init(void)
+{
+       int             status;
+       size_t          stacksize;
+       pthread_attr_t  attr;
+       sigset_t        blocked;
+
+       if (do_parallel == 0)
+               return;
+       if (thread_count == 0)
+               thread_count = 2 * libxfs_nproc();
+
+       if ((status = pthread_attr_init(&attr)) != 0)
+               do_error(_("status from pthread_attr_init: %d"),status);
+
+       if ((status = pthread_attr_getstacksize(&attr, &stacksize)) != 0)
+               do_error(_("status from pthread_attr_getstacksize: %d"), status);
+
+       stacksize *= 4;
+
+       if ((status = pthread_attr_setstacksize(&attr, stacksize)) != 0)
+               do_error(_("status from pthread_attr_setstacksize: %d"), status);
+
+       if ((status = pthread_setconcurrency(thread_count)) != 0)
+               do_error(_("Status from pthread_setconcurrency(%d): %d"), thread_count, status);
+
+       /*
+        *  block delivery of progress report signal to all threads
+         */
+       sigemptyset(&blocked);
+       sigaddset(&blocked, SIGHUP);
+       sigaddset(&blocked, SIGALRM);
+       pthread_sigmask(SIG_BLOCK, &blocked, NULL);
+
+       start_workers(&work_queue, thread_count, &attr);
+}
+
+/*
+ * Dequeue from the head of the list.
+ * wq->mutex held.
+ */
+static work_t *
+dequeue(work_queue_t *wq)
+{
+       work_t  *wp;
+
+       ASSERT(wq->work_count > 0);
+       wp = wq->next;
+       wq->next = wp->next;
+       wq->work_count--;
+       if (wq->next == NULL) {
+               ASSERT(wq->work_count == 0);
+               wq->last = NULL;
+       }
+       wp->next = NULL;
+       return (wp);
+}
+
+static void *
+worker_thread(void *arg)
+{
+       work_queue_t    *wq;
+       work_t          *wp;
+       int             err;
+       unsigned long   myid;
+
+       wq = &work_queue;
+       myid = (unsigned long) arg;
+       ts_init();
+       libxfs_lio_allocate();
+
+       /*
+        * Loop pulling work from the global work queue.
+        * Check for notification to exit after every chunk of work.
+        */
+       while (1) {
+               if ((err = pthread_mutex_lock(&wq->mutex)) > 0)
+                       do_error(_("work_thread%d: thread 0x%x: pthread_mutex_lock error %d: %s\n"),
+                               myid, pthread_self(), err, strerror(err));
+               /*
+                * Wait for work.
+                */
+               while (wq->next == NULL) {
+                       ASSERT(wq->work_count == 0);
+                       /*
+                        * Last thread going to idle sleep must wakeup
+                        * the master thread.  Same mutex is used to lock
+                        * around two different condition variables.
+                        */
+                       wq->active_threads--;
+                       ASSERT(wq->active_threads >= 0);
+                       if (!wq->active_threads) {
+                               if ((err = pthread_cond_signal(&wq->mcv)) > 0)
+                                       do_error(_("work_thread%d: thread 0x%x: pthread_cond_signal error %d: %s\n"),
+                                               myid, pthread_self(), err, strerror(err));
+                       }
+                       if ((err = pthread_cond_wait(&wq->wcv, &wq->mutex)) > 0)
+                               do_error(_("work_thread%d: thread 0x%x: pthread_cond_wait error %d: %s\n"),
+                                       myid, pthread_self(), err, strerror(err));
+                       wq->active_threads++;
+               }
+               /*
+                *  Dequeue work from the head of the list.
+                */
+               ASSERT(wq->work_count > 0);
+               wp = dequeue(wq);
+               if ((err = pthread_mutex_unlock(&wq->mutex)) > 0)
+                       do_error(_("work_thread%d: thread 0x%x: pthread_mutex_unlock error %d: %s\n"),
+                               myid, pthread_self(), err, strerror(err));
+               /*
+                *  Do the work.
+                */
+               (wp->function)(wp->mp, wp->agno);
+
+               free(wp);
+       }
+       /* NOT REACHED */
+       pthread_exit(NULL);
+       return (NULL);
+}
+
+int
+queue_work(disp_func_t func, xfs_mount_t *mp, xfs_agnumber_t agno)
+{
+       work_queue_t *wq;
+       work_t  *wp;
+
+       if (do_parallel == 0) {
+               func(mp, agno);
+               return 0;
+       }
+       wq = &work_queue;
+       /*
+        * Get memory for a new work structure.
+        */
+       if ((wp = (work_t *)memalign(8, sizeof(work_t))) == NULL)
+               return (ENOMEM);
+       /*
+        * Initialize the new work structure.
+        */
+       wp->function = func;
+       wp->mp = mp;
+       wp->agno = agno;
+
+       /*
+        *  Now queue the new work structure to the work queue.
+        */
+       if (wq->next == NULL) {
+               wq->next = wp;
+       } else {
+               wq->last->next = wp;
+       }
+       wq->last = wp;
+       wp->next = NULL;
+       wq->work_count++;
+
+       return (0);
+}
+
+void
+wait_for_workers(void)
+{
+       int             err;
+       work_queue_t    *wq;
+
+       if (do_parallel == 0)
+               return;
+       wq = &work_queue;
+       if ((err = pthread_mutex_lock(&wq->mutex)) > 0)
+               do_error(_("wait_for_workers: thread 0x%x: pthread_mutex_lock error %d: %s\n"),
+                       pthread_self(), err, strerror(err));
+
+       ASSERT(wq->active_threads == 0);
+       if (wq->work_count > 0) {
+               /* get the workers going */
+               if ((err = pthread_cond_broadcast(&wq->wcv)) > 0)
+                       do_error(_("wait_for_workers: thread 0x%x: pthread_cond_broadcast error %d: %s\n"),
+                               pthread_self(), err, strerror(err));
+               /* and wait for them */
+               if ((err = pthread_cond_wait(&wq->mcv, &wq->mutex)) > 0)
+                       do_error(_("wait_for_workers: thread 0x%x: pthread_cond_wait error %d: %s\n"),
+                               pthread_self(), err, strerror(err));
+       }
+       ASSERT(wq->active_threads == 0);
+       ASSERT(wq->work_count == 0);
+
+       if ((err = pthread_mutex_unlock(&wq->mutex)) > 0)
+               do_error(_("wait_for_workers: thread 0x%x: pthread_mutex_unlock error %d: %s\n"),
+                       pthread_self(), err, strerror(err));
+}
diff --git a/repair/threads.h b/repair/threads.h
new file mode 100644 (file)
index 0000000..9356e55
--- /dev/null
@@ -0,0 +1,37 @@
+#ifndef        _XFS_REPAIR_THREADS_H_
+#define        _XFS_REPAIR_THREADS_H_
+
+extern int             do_parallel;
+extern int             thread_count;
+/*
+**  locking variants - rwlock/mutex
+*/
+#define PREPAIR_RW_LOCK_ATTR           PTHREAD_PROCESS_PRIVATE
+
+#define        PREPAIR_RW_LOCK_ALLOC(lkp, n)                           \
+       if (do_parallel) {                                      \
+               lkp = malloc(n*sizeof(pthread_rwlock_t));       \
+               if (lkp == NULL)                                \
+                       do_error("cannot alloc %d locks\n", n); \
+                       /* NO RETURN */                         \
+       }
+#define PREPAIR_RW_LOCK_INIT(l,a)      if (do_parallel) pthread_rwlock_init((l),(a))
+#define PREPAIR_RW_READ_LOCK(l)        if (do_parallel) pthread_rwlock_rdlock((l))
+#define PREPAIR_RW_WRITE_LOCK(l)       if (do_parallel) pthread_rwlock_wrlock((l))
+#define PREPAIR_RW_UNLOCK(l)           if (do_parallel) pthread_rwlock_unlock((l))
+#define PREPAIR_RW_WRITE_LOCK_NOTEST(l)        pthread_rwlock_wrlock((l))
+#define PREPAIR_RW_UNLOCK_NOTEST(l)    pthread_rwlock_unlock((l))
+#define PREPAIR_RW_LOCK_DELETE(l)      if (do_parallel) pthread_rwlock_destroy((l))
+
+#define PREPAIR_MTX_LOCK_INIT(m, a)    if (do_parallel) pthread_mutex_init((m), (a))
+#define PREPAIR_MTX_ATTR_INIT(a)       if (do_parallel) pthread_mutexattr_init((a))
+#define PREPAIR_MTX_ATTR_SET(a, l)     if (do_parallel) pthread_mutexattr_settype((a), l)
+#define PREPAIR_MTX_LOCK(m)            if (do_parallel) pthread_mutex_lock(m)
+#define PREPAIR_MTX_UNLOCK(m)          if (do_parallel) pthread_mutex_unlock(m)
+
+
+typedef void   disp_func_t(xfs_mount_t *mp, xfs_agnumber_t agno);
+extern int     queue_work(disp_func_t func, xfs_mount_t *mp, xfs_agnumber_t agno);
+extern void    wait_for_workers(void);
+
+#endif /* _XFS_REPAIR_THREADS_H_ */
index 20cf34718697fd3de8992a6fd0a1c5eab9db1887..398cedc3f325d18e5f6e3d987ca7f96a0fd8496d 100644 (file)
@@ -26,6 +26,7 @@
 #include "incore.h"
 #include "err_protos.h"
 #include "prefetch.h"
+#include "threads.h"
 
 #define        rounddown(x, y) (((x)/(y))*(y))
 
@@ -63,9 +64,13 @@ char *o_opts[] = {
        "pfdir",
 #define        PREFETCH_AIO_CNT        6
        "pfaio",
+#define        THREAD_CNT              7
+       "thread",
        NULL
 };
 
+static void print_runtime(unsigned);
+
 static void
 usage(void)
 {
@@ -187,7 +192,7 @@ process_args(int argc, char **argv)
         * XXX have to add suboption processing here
         * attributes, quotas, nlinks, aligned_inos, sb_fbits
         */
-       while ((c = getopt(argc, argv, "o:fl:r:LnDvVdP")) != EOF)  {
+       while ((c = getopt(argc, argv, "o:fl:r:LnDvVdPM")) != EOF)  {
                switch (c) {
                case 'D':
                        dumpcore = 1;
@@ -228,6 +233,9 @@ process_args(int argc, char **argv)
                                case PREFETCH_AIO_CNT:
                                        libxfs_lio_aio_count = (int) strtol(val, 0, 0);
                                        break;
+                               case THREAD_CNT:
+                                       thread_count = (int) strtol(val, 0, 0);
+                                       break;
                                default:
                                        unknown('o', val);
                                        break;
@@ -255,7 +263,7 @@ process_args(int argc, char **argv)
                        dangerously = 1;
                        break;
                case 'v':
-                       verbose = 1;
+                       verbose++;
                        break;
                case 'V':
                        printf(_("%s version %s\n"), progname, VERSION);
@@ -263,6 +271,9 @@ process_args(int argc, char **argv)
                case 'P':
                        do_prefetch ^= 1;
                        break;
+               case 'M':
+                       do_parallel ^= 1;
+                       break;
                case '?':
                        usage();
                }
@@ -458,7 +469,9 @@ main(int argc, char **argv)
        xfs_sb_t        *sb;
        xfs_buf_t       *sbp;
        xfs_mount_t     xfs_m;
+       time_t          t, start;
 
+       start = time(NULL);
        progname = basename(argv[0]);
        setlocale(LC_ALL, "");
        bindtextdomain(PACKAGE, LOCALEDIR);
@@ -472,6 +485,10 @@ main(int argc, char **argv)
 
        /* do phase1 to make sure we have a superblock */
        phase1(temp_mp);
+       if (verbose) {
+               t = time(NULL);
+               fprintf(stderr, asctime(localtime(&t)));
+       }
 
        if (no_modify && primary_sb_modified)  {
                do_warn(_("Primary superblock would have been modified.\n"
@@ -522,18 +539,23 @@ main(int argc, char **argv)
        }
 
        /* make sure the per-ag freespace maps are ok so we can mount the fs */
-
        phase2(mp);
+       if (verbose) {
+               t = time(NULL);
+               fprintf(stderr, asctime(localtime(&t)));
+       }
 
-       if (verbose)
-               libxfs_report(stderr);
        phase3(mp);
-       if (verbose)
-               libxfs_report(stderr);
+       if (verbose) {
+               t = time(NULL);
+               fprintf(stderr, asctime(localtime(&t)));
+       }
 
        phase4(mp);
-       if (verbose)
-               libxfs_report(stderr);
+       if (verbose) {
+               t = time(NULL);
+               fprintf(stderr, asctime(localtime(&t)));
+       }
 
        /* XXX: nathans - something in phase4 ain't playing by */
        /* the buffer cache rules.. why doesn't IRIX hit this? */
@@ -541,15 +563,26 @@ main(int argc, char **argv)
 
        if (no_modify)
                printf(_("No modify flag set, skipping phase 5\n"));
-       else
+       else {
                phase5(mp);
+               if (verbose) {
+                       t = time(NULL);
+                       fprintf(stderr, asctime(localtime(&t)));
+               }
+       }
 
        if (!bad_ino_btree)  {
                phase6(mp);
-               if (verbose)
-                       libxfs_report(stderr);
+               if (verbose) {
+                       t = time(NULL);
+                       fprintf(stderr, asctime(localtime(&t)));
+               }
 
                phase7(mp);
+               if (verbose) {
+                       t = time(NULL);
+                       fprintf(stderr, asctime(localtime(&t)));
+               }
        } else  {
                do_warn(
 _("Inode allocation btrees are too corrupted, skipping phases 6 and 7\n"));
@@ -609,12 +642,14 @@ _("Warning:  project quota information would be cleared.\n"
                }
        }
 
-       if (verbose)
+       if (verbose > 1)
                libxfs_report(stderr);
 
        if (no_modify)  {
                do_log(
        _("No modify flag set, skipping filesystem flush and exiting.\n"));
+               if (verbose)
+                       print_runtime(t - start);
                if (fs_is_dirty)
                        return(1);
 
@@ -661,6 +696,33 @@ _("Note - stripe unit (%d) and width (%d) fields have been reset.\n"
        libxfs_device_close(x.ddev);
 
        do_log(_("done\n"));
+       if (verbose) {
+               print_runtime(t - start);
+       }
+       return (0);
+}
 
-       return(0);
+static void
+print_runtime(unsigned s)
+{
+       unsigned h, m;
+
+       h = s / 3600;
+       s %= 3600;
+       m = s / 60;
+       s %= 60;
+       if (h) {
+               fprintf(stderr, "Run time %d hour%s %d minute%s %d second%s\n",
+                       h, h > 1 ? "s" : "",
+                       m, m != 1 ? "s" : "",
+                       s, s != 1 ? "s" : "");
+       } else if (m) {
+               fprintf(stderr, "Run time %d minute%s %d second%s\n",
+                       m, m > 1 ? "s" : "",
+                       s, s != 1 ? "s" : "");
+       }
+       else {
+               fprintf(stderr, "Run time %d second%s\n",
+                       s, s != 1 ? "s" : "");
+       }
 }