]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/commitdiff
repair: translation lookups limit scalability
authorDave Chinner <dchinner@redhat.com>
Mon, 3 Mar 2014 01:16:44 +0000 (12:16 +1100)
committerDave Chinner <david@fromorbit.com>
Mon, 3 Mar 2014 01:16:44 +0000 (12:16 +1100)
A bit of perf magic showed that scalability was limits to 3-4
concurrent threads due to contention on a lock inside in something
called __dcigettext(). That some library somewhere that repair is
linked against, and it turns out to be inside the translation
infrastructure to support the _() string mechanism:

# Samples: 34K of event 'cs'
# Event count (approx.): 495567
#
# Overhead        Command      Shared Object          Symbol
# ........  .............  .................  ..............
#
    60.30%     xfs_repair  [kernel.kallsyms]  [k] __schedule
               |
               --- 0x63fffff9c
                   process_bmbt_reclist_int
                  |
                  |--39.95%-- __dcigettext
                  |          __lll_lock_wait
                  |          system_call_fastpath
                  |          SyS_futex
                  |          do_futex
                  |          futex_wait
                  |          futex_wait_queue_me
                  |          schedule
                  |          __schedule
                  |
                  |--8.91%-- __lll_lock_wait
                  |          system_call_fastpath
                  |          SyS_futex
                  |          do_futex
                  |          futex_wait
                  |          futex_wait_queue_me
                  |          schedule
                  |          __schedule
                   --51.13%-- [...]

Fix this by initialising global variables that hold the translated
strings at startup, hence avoiding the need to do repeated runtime
translation of the same strings.

Runtime of an unpatched xfs_repair is roughly:

        XFS_REPAIR Summary    Fri Dec  6 11:15:50 2013

Phase           Start           End             Duration
Phase 1:        12/06 10:56:21  12/06 10:56:21
Phase 2:        12/06 10:56:21  12/06 10:56:23  2 seconds
Phase 3:        12/06 10:56:23  12/06 11:01:31  5 minutes, 8 seconds
Phase 4:        12/06 11:01:31  12/06 11:07:08  5 minutes, 37 seconds
Phase 5:        12/06 11:07:08  12/06 11:07:09  1 second
Phase 6:        12/06 11:07:09  12/06 11:15:49  8 minutes, 40 seconds
Phase 7:        12/06 11:15:49  12/06 11:15:50  1 second

Total run time: 19 minutes, 29 seconds

Patched version:

Phase           Start           End             Duration
Phase 1:        12/06 10:36:29  12/06 10:36:29
Phase 2:        12/06 10:36:29  12/06 10:36:31  2 seconds
Phase 3:        12/06 10:36:31  12/06 10:40:08  3 minutes, 37 seconds
Phase 4:        12/06 10:40:08  12/06 10:43:42  3 minutes, 34 seconds
Phase 5:        12/06 10:43:42  12/06 10:43:42
Phase 6:        12/06 10:43:42  12/06 10:50:28  6 minutes, 46 seconds
Phase 7:        12/06 10:50:28  12/06 10:50:29  1 second

Total run time: 14 minutes

Big win!

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
repair/dinode.c
repair/dinode.h
repair/scan.c
repair/xfs_repair.c

index 3115bd0d66ff84e3aae7e550b10d53eb20db625c..4953a5623b9acc4e42e7e0db5b26b3491e05347c 100644 (file)
 #include "bmap.h"
 #include "threads.h"
 
+/*
+ * gettext lookups for translations of strings use mutexes internally to
+ * the library. Hence when we come through here doing parallel scans in
+ * multiple AGs, then all do concurrent text conversions and serialise
+ * on the translation string lookups. Let's avoid doing repeated lookups
+ * by making them static variables and only assigning the translation
+ * once.
+ */
+static char    *forkname_data;
+static char    *forkname_attr;
+static char    *ftype_real_time;
+static char    *ftype_regular;
+
+void
+dinode_bmbt_translation_init(void)
+{
+       forkname_data = _("data");
+       forkname_attr = _("attr");
+       ftype_real_time = _("real-time");
+       ftype_regular = _("regular");
+}
+
+char *
+get_forkname(int whichfork)
+{
+
+       if (whichfork == XFS_DATA_FORK)
+               return forkname_data;
+       return forkname_attr;
+}
+
 /*
  * inode clearing routines
  */
@@ -542,7 +573,7 @@ process_bmbt_reclist_int(
        xfs_dfiloff_t           op = 0;         /* prev offset */
        xfs_dfsbno_t            b;
        char                    *ftype;
-       char                    *forkname;
+       char                    *forkname = get_forkname(whichfork);
        int                     i;
        int                     state;
        xfs_agnumber_t          agno;
@@ -552,15 +583,10 @@ process_bmbt_reclist_int(
        xfs_agnumber_t          locked_agno = -1;
        int                     error = 1;
 
-       if (whichfork == XFS_DATA_FORK)
-               forkname = _("data");
-       else
-               forkname = _("attr");
-
        if (type == XR_INO_RTDATA)
-               ftype = _("real-time");
+               ftype = ftype_real_time;
        else
-               ftype = _("regular");
+               ftype = ftype_regular;
 
        for (i = 0; i < *numrecs; i++) {
                libxfs_bmbt_disk_get_all(rp + i, &irec);
@@ -1110,7 +1136,7 @@ process_btinode(
        xfs_ino_t               lino;
        xfs_bmbt_ptr_t          *pp;
        xfs_bmbt_key_t          *pkey;
-       char                    *forkname;
+       char                    *forkname = get_forkname(whichfork);
        int                     i;
        int                     level;
        int                     numrecs;
@@ -1122,11 +1148,6 @@ process_btinode(
        *tot = 0;
        *nex = 0;
 
-       if (whichfork == XFS_DATA_FORK)
-               forkname = _("data");
-       else
-               forkname = _("attr");
-
        magic = xfs_sb_version_hascrc(&mp->m_sb) ? XFS_BMAP_CRC_MAGIC
                                                 : XFS_BMAP_MAGIC;
 
index d9197c1da6bd178867a3e302870d6d404fb6ca20..7521521172716d0c57fef91b1964bdcc1b3d2d60 100644 (file)
@@ -127,4 +127,7 @@ get_bmapi(xfs_mount_t               *mp,
                xfs_dfiloff_t   bno,
                int             whichfork );
 
+void dinode_bmbt_translation_init(void);
+char * get_forkname(int whichfork);
+
 #endif /* _XR_DINODE_H */
index 73b458178b8d59133a62fbe95818eefb156e954b..b12f48bae5cd3296a51dfe71f6787bd9720b668a 100644 (file)
@@ -171,17 +171,12 @@ scan_bmapbt(
        xfs_bmbt_rec_t          *rp;
        xfs_dfiloff_t           first_key;
        xfs_dfiloff_t           last_key;
-       char                    *forkname;
+       char                    *forkname = get_forkname(whichfork);
        int                     numrecs;
        xfs_agnumber_t          agno;
        xfs_agblock_t           agbno;
        int                     state;
 
-       if (whichfork == XFS_DATA_FORK)
-               forkname = _("data");
-       else
-               forkname = _("attr");
-
        /*
         * unlike the ag freeblock btrees, if anything looks wrong
         * in an inode bmap tree, just bail.  it's possible that
index 9e0502a73acf038667f75436d379804f6ddde7d3..bac334f8160f9dd77ad51a7222160dcd29904cca 100644 (file)
@@ -29,6 +29,7 @@
 #include "prefetch.h"
 #include "threads.h"
 #include "progress.h"
+#include "dinode.h"
 
 #define        rounddown(x, y) (((x)/(y))*(y))
 
@@ -533,6 +534,7 @@ main(int argc, char **argv)
        setlocale(LC_ALL, "");
        bindtextdomain(PACKAGE, LOCALEDIR);
        textdomain(PACKAGE);
+       dinode_bmbt_translation_init();
 
        temp_mp = &xfs_m;
        setbuf(stdout, NULL);