]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blobdiff - repair/dino_chunks.c
libxfs: refactor manage_zones()
[thirdparty/xfsprogs-dev.git] / repair / dino_chunks.c
index 0b16db66952d14452d238d92cf2cfe4049eef416..3b1890b18ad8475c961388d7cc5381232c53ec0f 100644 (file)
@@ -1,41 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
  * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
-#include <libxfs.h>
+#include "libxfs.h"
 #include "avl.h"
 #include "globals.h"
 #include "agheader.h"
 #include "incore.h"
 #include "protos.h"
 #include "err_protos.h"
-#include "dir.h"
 #include "dinode.h"
-#include "prefetch.h"
-#include "threads.h"
 #include "versions.h"
+#include "prefetch.h"
+#include "progress.h"
 
 /*
  * validates inode block or chunk, returns # of good inodes
  * the dinodes are verified using verify_uncertain_dinode() which
  * means only the basic inode info is checked, no fork checks.
  */
-
-int
+static int
 check_aginode_block(xfs_mount_t        *mp,
                        xfs_agnumber_t  agno,
                        xfs_agblock_t   agbno)
@@ -54,32 +40,26 @@ check_aginode_block(xfs_mount_t     *mp,
         * so no one else will overlap them.
         */
        bp = libxfs_readbuf(mp->m_dev, XFS_AGB_TO_DADDR(mp, agno, agbno),
-                       XFS_FSB_TO_BB(mp, 1), 0);
+                       XFS_FSB_TO_BB(mp, 1), 0, NULL);
        if (!bp) {
-               do_warn(_("cannot read agbno (%u/%u), disk block %lld\n"), agno,
-                       agbno, (xfs_daddr_t)XFS_AGB_TO_DADDR(mp, agno, agbno));
+               do_warn(_("cannot read agbno (%u/%u), disk block %" PRId64 "\n"),
+                       agno, agbno, XFS_AGB_TO_DADDR(mp, agno, agbno));
                return(0);
        }
 
        for (i = 0; i < mp->m_sb.sb_inopblock; i++)  {
-               dino_p = XFS_MAKE_IPTR(mp, bp, i);
+               dino_p = xfs_make_iptr(mp, bp, i);
                if (!verify_uncertain_dinode(mp, dino_p, agno,
                                XFS_OFFBNO_TO_AGINO(mp, agbno, i)))
                        cnt++;
        }
+       if (cnt)
+               bp->b_ops = &xfs_inode_buf_ops;
 
        libxfs_putbuf(bp);
        return(cnt);
 }
 
-int
-check_inode_block(xfs_mount_t          *mp,
-                       xfs_ino_t       ino)
-{
-       return(check_aginode_block(mp, XFS_INO_TO_AGNO(mp, ino),
-                                       XFS_INO_TO_AGBNO(mp, ino)));
-}
-
 /*
  * tries to establish if the inode really exists in a valid
  * inode chunk.  returns number of new inodes if things are good
@@ -93,7 +73,7 @@ check_inode_block(xfs_mount_t         *mp,
  * routines called by check_uncertain_aginodes() and
  * process_uncertain_aginodes().
  */
-int
+static int
 verify_inode_chunk(xfs_mount_t         *mp,
                        xfs_ino_t       ino,
                        xfs_ino_t       *start_ino)
@@ -118,17 +98,18 @@ verify_inode_chunk(xfs_mount_t             *mp,
        int             i;
        int             j;
        int             state;
+       xfs_extlen_t    blen;
 
        agno = XFS_INO_TO_AGNO(mp, ino);
        agino = XFS_INO_TO_AGINO(mp, ino);
        agbno = XFS_INO_TO_AGBNO(mp, ino);
        *start_ino = NULLFSINO;
 
-       ASSERT(XFS_IALLOC_BLOCKS(mp) > 0);
+       ASSERT(mp->m_ialloc_blks > 0);
 
        if (agno == mp->m_sb.sb_agcount - 1)
                max_agbno = mp->m_sb.sb_dblocks -
-                       (xfs_drfsbno_t) mp->m_sb.sb_agblocks * agno;
+                       (xfs_rfsblock_t) mp->m_sb.sb_agblocks * agno;
        else
                max_agbno = mp->m_sb.sb_agblocks;
 
@@ -142,26 +123,25 @@ verify_inode_chunk(xfs_mount_t            *mp,
         * check for the easy case, inodes per block >= XFS_INODES_PER_CHUNK
         * (multiple chunks per block)
         */
-       if (XFS_IALLOC_BLOCKS(mp) == 1)  {
+       if (mp->m_ialloc_blks == 1)  {
                if (agbno > max_agbno)
-                       return(0);
+                       return 0;
+               if (check_aginode_block(mp, agno, agino) == 0)
+                       return 0;
 
-               if (check_inode_block(mp, ino) == 0)
-                       return(0);
+               pthread_mutex_lock(&ag_locks[agno].lock);
 
-               PREPAIR_RW_WRITE_LOCK(&per_ag_lock[agno]);
-               switch (state = get_agbno_state(mp, agno, agbno))  {
+               state = get_bmap(agno, agbno);
+               switch (state) {
                case XR_E_INO:
                        do_warn(
                _("uncertain inode block %d/%d already known\n"),
                                agno, agbno);
-                       PREPAIR_RW_UNLOCK(&per_ag_lock[agno]);
                        break;
                case XR_E_UNKNOWN:
                case XR_E_FREE1:
                case XR_E_FREE:
-                       set_agbno_state(mp, agno, agbno, XR_E_INO);
-                       PREPAIR_RW_UNLOCK(&per_ag_lock[agno]);
+                       set_bmap(agno, agbno, XR_E_INO);
                        break;
                case XR_E_MULT:
                case XR_E_INUSE:
@@ -173,28 +153,29 @@ verify_inode_chunk(xfs_mount_t            *mp,
                        do_warn(
                _("inode block %d/%d multiply claimed, (state %d)\n"),
                                agno, agbno, state);
-                       set_agbno_state(mp, agno, agbno, XR_E_MULT);
-                       PREPAIR_RW_UNLOCK(&per_ag_lock[agno]);
+                       set_bmap(agno, agbno, XR_E_MULT);
+                       pthread_mutex_unlock(&ag_locks[agno].lock);
                        return(0);
                default:
                        do_warn(
                _("inode block %d/%d bad state, (state %d)\n"),
                                agno, agbno, state);
-                       set_agbno_state(mp, agno, agbno, XR_E_INO);
-                       PREPAIR_RW_UNLOCK(&per_ag_lock[agno]);
+                       set_bmap(agno, agbno, XR_E_INO);
                        break;
                }
 
-               start_agino = XFS_OFFBNO_TO_AGINO(mp, agbno, 0);
+               pthread_mutex_unlock(&ag_locks[agno].lock);
+
+               start_agino = XFS_AGB_TO_AGINO(mp, agbno);
                *start_ino = XFS_AGINO_TO_INO(mp, agno, start_agino);
 
                /*
                 * put new inode record(s) into inode tree
                 */
                for (j = 0; j < chunks_pblock; j++)  {
-                       if ((irec_p = find_inode_rec(agno, start_agino))
+                       if ((irec_p = find_inode_rec(mp, agno, start_agino))
                                        == NULL)  {
-                               irec_p = set_inode_free_alloc(agno,
+                               irec_p = set_inode_free_alloc(mp, agno,
                                                        start_agino);
                                for (i = 1; i < XFS_INODES_PER_CHUNK; i++)
                                        set_inode_free(irec_p, i);
@@ -215,7 +196,7 @@ verify_inode_chunk(xfs_mount_t              *mp,
                 */
                start_agbno = rounddown(XFS_INO_TO_AGBNO(mp, ino),
                                        fs_ino_alignment);
-               end_agbno = start_agbno + XFS_IALLOC_BLOCKS(mp);
+               end_agbno = start_agbno + mp->m_ialloc_blks;
 
                /*
                 * if this fs has aligned inodes but the end of the
@@ -246,14 +227,14 @@ verify_inode_chunk(xfs_mount_t            *mp,
                 * ok, put the record into the tree, if no conflict.
                 */
                if (find_uncertain_inode_rec(agno,
-                               XFS_OFFBNO_TO_AGINO(mp, start_agbno, 0)))
+                               XFS_AGB_TO_AGINO(mp, start_agbno)))
                        return(0);
 
-               start_agino = XFS_OFFBNO_TO_AGINO(mp, start_agbno, 0);
+               start_agino = XFS_AGB_TO_AGINO(mp, start_agbno);
                *start_ino = XFS_AGINO_TO_INO(mp, agno, start_agino);
 
-               irec_p = set_inode_free_alloc(agno,
-                               XFS_OFFBNO_TO_AGINO(mp, start_agbno, 0));
+               irec_p = set_inode_free_alloc(mp, agno,
+                               XFS_AGB_TO_AGINO(mp, start_agbno));
 
                for (i = 1; i < XFS_INODES_PER_CHUNK; i++)
                        set_inode_free(irec_p, i);
@@ -273,14 +254,14 @@ verify_inode_chunk(xfs_mount_t            *mp,
         * a discovered inode chunk completely within that range
         * would include the inode passed into us.
         */
-       if (XFS_IALLOC_BLOCKS(mp) > 1)  {
-               if (agino > XFS_IALLOC_INODES(mp))
-                       start_agbno = agbno - XFS_IALLOC_BLOCKS(mp) + 1;
+       if (mp->m_ialloc_blks > 1)  {
+               if (agino > mp->m_ialloc_inos)
+                       start_agbno = agbno - mp->m_ialloc_blks + 1;
                else
                        start_agbno = 1;
        }
 
-       end_agbno = agbno + XFS_IALLOC_BLOCKS(mp);
+       end_agbno = agbno + mp->m_ialloc_blks;
 
        if (end_agbno > max_agbno)
                end_agbno = max_agbno;
@@ -290,7 +271,7 @@ verify_inode_chunk(xfs_mount_t              *mp,
         */
        irec_before_p = irec_after_p = NULL;
 
-       find_inode_rec_range(agno, XFS_OFFBNO_TO_AGINO(mp, start_agbno, 0),
+       find_inode_rec_range(mp, agno, XFS_AGB_TO_AGINO(mp, start_agbno),
                XFS_OFFBNO_TO_AGINO(mp, end_agbno, mp->m_sb.sb_inopblock - 1),
                &irec_before_p, &irec_after_p);
 
@@ -335,7 +316,7 @@ verify_inode_chunk(xfs_mount_t              *mp,
 
                        start_agbno = XFS_AGINO_TO_AGBNO(mp,
                                                irec_p->ino_startnum) +
-                                               XFS_IALLOC_BLOCKS(mp);
+                                               mp->m_ialloc_blks;
 
                        /*
                         * we know that the inode we're trying to verify isn't
@@ -343,7 +324,7 @@ verify_inode_chunk(xfs_mount_t              *mp,
                         * of the gap -- is it within the search range?
                         */
                        if (irec_next_p != NULL &&
-                                       agino + XFS_IALLOC_INODES(mp) >=
+                                       agino + mp->m_ialloc_inos >=
                                                irec_next_p->ino_startnum)
                                end_agbno = XFS_AGINO_TO_AGBNO(mp,
                                                irec_next_p->ino_startnum);
@@ -358,7 +339,7 @@ verify_inode_chunk(xfs_mount_t              *mp,
         * the inode in question and that the space between them
         * is too small for a legal inode chunk
         */
-       if (end_agbno - start_agbno < XFS_IALLOC_BLOCKS(mp))
+       if (end_agbno - start_agbno < mp->m_ialloc_blks)
                return(0);
 
        /*
@@ -402,8 +383,8 @@ verify_inode_chunk(xfs_mount_t              *mp,
 
        num_blks = chunk_stop_agbno - chunk_start_agbno;
 
-       if (num_blks < XFS_IALLOC_BLOCKS(mp) || ino_cnt == 0)
-               return(0);
+       if (num_blks < mp->m_ialloc_blks || ino_cnt == 0)
+               return 0;
 
        /*
         * XXX - later - if the entire range is selected and they're all
@@ -418,8 +399,8 @@ verify_inode_chunk(xfs_mount_t              *mp,
         * the chunk
         */
 
-       if (num_blks % XFS_IALLOC_BLOCKS(mp) != 0)  {
-               num_blks = rounddown(num_blks, XFS_IALLOC_BLOCKS(mp));
+       if (num_blks % mp->m_ialloc_blks != 0)  {
+               num_blks = rounddown(num_blks, mp->m_ialloc_blks);
                chunk_stop_agbno = chunk_start_agbno + num_blks;
        }
 
@@ -431,35 +412,32 @@ verify_inode_chunk(xfs_mount_t            *mp,
         * user data -- we're probably here as a result of a directory
         * entry or an iunlinked pointer
         */
-       PREPAIR_RW_WRITE_LOCK(&per_ag_lock[agno]);
-       for (j = 0, cur_agbno = chunk_start_agbno;
-                       cur_agbno < chunk_stop_agbno; cur_agbno++)  {
-               switch (state = get_agbno_state(mp, agno, cur_agbno))  {
+       pthread_mutex_lock(&ag_locks[agno].lock);
+       for (cur_agbno = chunk_start_agbno;
+            cur_agbno < chunk_stop_agbno;
+            cur_agbno += blen)  {
+               state = get_bmap_ext(agno, cur_agbno, chunk_stop_agbno, &blen);
+               switch (state) {
                case XR_E_MULT:
                case XR_E_INUSE:
                case XR_E_INUSE_FS:
                case XR_E_FS_MAP:
                        do_warn(
-               _("inode block %d/%d multiply claimed, (state %d)\n"),
+       _("inode block %d/%d multiply claimed, (state %d)\n"),
                                agno, cur_agbno, state);
-                       set_agbno_state(mp, agno, cur_agbno, XR_E_MULT);
-                       j = 1;
-                       break;
+                       set_bmap_ext(agno, cur_agbno, blen, XR_E_MULT);
+                       pthread_mutex_unlock(&ag_locks[agno].lock);
+                       return 0;
                case XR_E_INO:
                        do_error(
-               _("uncertain inode block overlap, agbno = %d, ino = %llu\n"),
+       _("uncertain inode block overlap, agbno = %d, ino = %" PRIu64 "\n"),
                                agbno, ino);
                        break;
                default:
                        break;
                }
-
-               if (j) {
-                       PREPAIR_RW_UNLOCK(&per_ag_lock[agno]);
-                       return(0);
-               }
        }
-       PREPAIR_RW_UNLOCK(&per_ag_lock[agno]);
+       pthread_mutex_unlock(&ag_locks[agno].lock);
 
        /*
         * ok, chunk is good.  put the record into the tree if required,
@@ -468,12 +446,12 @@ verify_inode_chunk(xfs_mount_t            *mp,
         * ok because we'll override the free setting later if the
         * contents of the inode indicate it's in use.
         */
-       start_agino = XFS_OFFBNO_TO_AGINO(mp, chunk_start_agbno, 0);
+       start_agino = XFS_AGB_TO_AGINO(mp, chunk_start_agbno);
        *start_ino = XFS_AGINO_TO_INO(mp, agno, start_agino);
 
-       ASSERT(find_inode_rec(agno, start_agino) == NULL);
+       ASSERT(find_inode_rec(mp, agno, start_agino) == NULL);
 
-       irec_p = set_inode_free_alloc(agno, start_agino);
+       irec_p = set_inode_free_alloc(mp, agno, start_agino);
        for (i = 1; i < XFS_INODES_PER_CHUNK; i++)
                set_inode_free(irec_p, i);
 
@@ -482,19 +460,22 @@ verify_inode_chunk(xfs_mount_t            *mp,
 
        set_inode_used(irec_p, agino - start_agino);
 
-       PREPAIR_RW_WRITE_LOCK(&per_ag_lock[agno]);
+       pthread_mutex_lock(&ag_locks[agno].lock);
+
        for (cur_agbno = chunk_start_agbno;
-                       cur_agbno < chunk_stop_agbno; cur_agbno++)  {
-               switch (state = get_agbno_state(mp, agno, cur_agbno))  {
+            cur_agbno < chunk_stop_agbno;
+            cur_agbno += blen)  {
+               state = get_bmap_ext(agno, cur_agbno, chunk_stop_agbno, &blen);
+               switch (state) {
                case XR_E_INO:
                        do_error(
-               _("uncertain inode block %llu already known\n"),
+               _("uncertain inode block %" PRIu64 " already known\n"),
                                XFS_AGB_TO_FSB(mp, agno, cur_agbno));
                        break;
                case XR_E_UNKNOWN:
                case XR_E_FREE1:
                case XR_E_FREE:
-                       set_agbno_state(mp, agno, cur_agbno, XR_E_INO);
+                       set_bmap_ext(agno, cur_agbno, blen, XR_E_INO);
                        break;
                case XR_E_MULT:
                case XR_E_INUSE:
@@ -508,11 +489,11 @@ verify_inode_chunk(xfs_mount_t            *mp,
                        do_warn(
                _("inode block %d/%d bad state, (state %d)\n"),
                                agno, cur_agbno, state);
-                       set_agbno_state(mp, agno, cur_agbno, XR_E_INO);
+                       set_bmap_ext(agno, cur_agbno, blen, XR_E_INO);
                        break;
                }
        }
-       PREPAIR_RW_UNLOCK(&per_ag_lock[agno]);
+       pthread_mutex_unlock(&ag_locks[agno].lock);
 
        return(ino_cnt);
 }
@@ -520,7 +501,7 @@ verify_inode_chunk(xfs_mount_t              *mp,
 /*
  * same as above only for ag inode chunks
  */
-int
+static int
 verify_aginode_chunk(xfs_mount_t       *mp,
                        xfs_agnumber_t  agno,
                        xfs_agino_t     agino,
@@ -543,7 +524,7 @@ verify_aginode_chunk(xfs_mount_t    *mp,
  * this does the same as the two above only it returns a pointer
  * to the inode record in the good inode tree
  */
-ino_tree_node_t *
+static ino_tree_node_t *
 verify_aginode_chunk_irec(xfs_mount_t  *mp,
                        xfs_agnumber_t  agno,
                        xfs_agino_t     agino)
@@ -552,12 +533,47 @@ verify_aginode_chunk_irec(xfs_mount_t     *mp,
        ino_tree_node_t *irec = NULL;
 
        if (verify_aginode_chunk(mp, agno, agino, &start_agino))
-               irec = find_inode_rec(agno, start_agino);
+               irec = find_inode_rec(mp, agno, start_agino);
 
        return(irec);
 }
 
+/*
+ * Set the state of an inode block during inode chunk processing. The block is
+ * expected to be in the free or inode state. If free, it transitions to the
+ * inode state. Warn if the block is in neither expected state as this indicates
+ * multiply claimed blocks.
+ */
+static void
+process_inode_agbno_state(
+       struct xfs_mount        *mp,
+       xfs_agnumber_t          agno,
+       xfs_agblock_t           agbno)
+{
+       int state;
 
+       pthread_mutex_lock(&ag_locks[agno].lock);
+       state = get_bmap(agno, agbno);
+       switch (state) {
+       case XR_E_INO:  /* already marked */
+               break;
+       case XR_E_UNKNOWN:
+       case XR_E_FREE:
+       case XR_E_FREE1:
+               set_bmap(agno, agbno, XR_E_INO);
+               break;
+       case XR_E_BAD_STATE:
+               do_error(_("bad state in block map %d\n"), state);
+               break;
+       default:
+               set_bmap(agno, agbno, XR_E_MULT);
+               do_warn(
+       _("inode block %" PRIu64 " multiply claimed, state was %d\n"),
+                       XFS_AGB_TO_FSB(mp, agno, agbno), state);
+               break;
+       }
+       pthread_mutex_unlock(&ag_locks[agno].lock);
+}
 
 /*
  * processes an inode allocation chunk/block, returns 1 on I/O errors,
@@ -565,35 +581,49 @@ verify_aginode_chunk_irec(xfs_mount_t     *mp,
  *
  * *bogus is set to 1 if the entire set of inodes is bad.
  */
-/* ARGSUSED */
-int
-process_inode_chunk(xfs_mount_t *mp, xfs_agnumber_t agno, int num_inos,
-                       ino_tree_node_t *first_irec, int ino_discovery,
-                       int check_dups, int extra_attr_check, int *bogus)
+static int
+process_inode_chunk(
+       xfs_mount_t             *mp,
+       xfs_agnumber_t          agno,
+       int                     num_inos,
+       ino_tree_node_t         *first_irec,
+       int                     ino_discovery,
+       int                     check_dups,
+       int                     extra_attr_check,
+       int                     *bogus)
 {
        xfs_ino_t               parent;
        ino_tree_node_t         *ino_rec;
-       xfs_buf_t               *bp;
+       xfs_buf_t               **bplist;
        xfs_dinode_t            *dino;
        int                     icnt;
        int                     status;
        int                     is_used;
-       int                     state;
-       int                     done;
        int                     ino_dirty;
        int                     irec_offset;
        int                     ibuf_offset;
        xfs_agino_t             agino;
        xfs_agblock_t           agbno;
+       xfs_ino_t               ino;
        int                     dirty = 0;
-       int                     cleared = 0;
        int                     isa_dir = 0;
+       int                     blks_per_cluster;
+       int                     cluster_count;
+       int                     bp_index;
+       int                     cluster_offset;
 
        ASSERT(first_irec != NULL);
        ASSERT(XFS_AGINO_TO_OFFSET(mp, first_irec->ino_startnum) == 0);
 
        *bogus = 0;
-       ASSERT(XFS_IALLOC_BLOCKS(mp) > 0);
+       ASSERT(mp->m_ialloc_blks > 0);
+
+       blks_per_cluster = mp->m_inode_cluster_size >> mp->m_sb.sb_blocklog;
+       if (blks_per_cluster == 0)
+               blks_per_cluster = 1;
+       cluster_count = XFS_INODES_PER_CHUNK / inodes_per_cluster;
+       if (cluster_count == 0)
+               cluster_count = 1;
 
        /*
         * get all blocks required to read in this chunk (may wind up
@@ -601,64 +631,109 @@ process_inode_chunk(xfs_mount_t *mp, xfs_agnumber_t agno, int num_inos,
         */
        agbno = XFS_AGINO_TO_AGBNO(mp, first_irec->ino_startnum);
 
-       bp = libxfs_readbuf(mp->m_dev, XFS_AGB_TO_DADDR(mp, agno, agbno),
-                       XFS_FSB_TO_BB(mp, XFS_IALLOC_BLOCKS(mp)), 0);
-       if (!bp) {
-               do_warn(_("cannot read inode %llu, disk block %lld, cnt %d\n"),
-                       XFS_AGINO_TO_INO(mp, agno, first_irec->ino_startnum),
-                       XFS_AGB_TO_DADDR(mp, agno, agbno),
-                       (int)XFS_FSB_TO_BB(mp, XFS_IALLOC_BLOCKS(mp)));
-               return(1);
-       }
-
        /*
         * set up first irec
         */
        ino_rec = first_irec;
+       irec_offset = 0;
+
+       bplist = malloc(cluster_count * sizeof(xfs_buf_t *));
+       if (bplist == NULL)
+               do_error(_("failed to allocate %zd bytes of memory\n"),
+                       cluster_count * sizeof(xfs_buf_t *));
+
+       for (bp_index = 0; bp_index < cluster_count; bp_index++) {
+               /*
+                * Skip the cluster buffer if the first inode is sparse. The
+                * remaining inodes in the cluster share the same state as
+                * sparse inodes occur at cluster granularity.
+                */
+               if (is_inode_sparse(ino_rec, irec_offset)) {
+                       pftrace("skip sparse inode, startnum 0x%x idx %d",
+                               ino_rec->ino_startnum, irec_offset);
+                       bplist[bp_index] = NULL;
+                       goto next_readbuf;
+               }
+
+               pftrace("about to read off %llu in AG %d",
+                       XFS_AGB_TO_DADDR(mp, agno, agbno), agno);
+
+               bplist[bp_index] = libxfs_readbuf(mp->m_dev,
+                                       XFS_AGB_TO_DADDR(mp, agno, agbno),
+                                       XFS_FSB_TO_BB(mp, blks_per_cluster), 0,
+                                       &xfs_inode_buf_ops);
+               if (!bplist[bp_index]) {
+                       do_warn(_("cannot read inode %" PRIu64 ", disk block %" PRId64 ", cnt %d\n"),
+                               XFS_AGINO_TO_INO(mp, agno, first_irec->ino_startnum),
+                               XFS_AGB_TO_DADDR(mp, agno, agbno),
+                               XFS_FSB_TO_BB(mp, blks_per_cluster));
+                       while (bp_index > 0) {
+                               bp_index--;
+                               libxfs_putbuf(bplist[bp_index]);
+                       }
+                       free(bplist);
+                       return(1);
+               }
+
+               pftrace("readbuf %p (%llu, %d) in AG %d", bplist[bp_index],
+                       (long long)XFS_BUF_ADDR(bplist[bp_index]),
+                       bplist[bp_index]->b_bcount, agno);
+
+               bplist[bp_index]->b_ops = &xfs_inode_buf_ops;
+
+next_readbuf:
+               irec_offset += mp->m_sb.sb_inopblock * blks_per_cluster;
+               agbno += blks_per_cluster;
+       }
+       agbno = XFS_AGINO_TO_AGBNO(mp, first_irec->ino_startnum);
+
        /*
         * initialize counters
         */
        irec_offset = 0;
        ibuf_offset = 0;
+       cluster_offset = 0;
        icnt = 0;
        status = 0;
-       done = 0;
+       bp_index = 0;
 
        /*
         * verify inode chunk if necessary
         */
        if (ino_discovery)  {
-               while (!done)  {
-                       /*
-                        * make inode pointer
-                        */
-                       dino = XFS_MAKE_IPTR(mp, bp, icnt);
+               for (;;)  {
                        agino = irec_offset + ino_rec->ino_startnum;
 
-                       /*
-                        * we always think that the root and realtime
-                        * inodes are verified even though we may have
-                        * to reset them later to keep from losing the
-                        * chunk that they're in
-                        */
-                       if (verify_dinode(mp, dino, agno, agino) == 0 ||
-                                       (agno == 0 &&
-                                       (mp->m_sb.sb_rootino == agino ||
-                                        mp->m_sb.sb_rsumino == agino ||
-                                        mp->m_sb.sb_rbmino == agino)))
-                               status++;
+                       /* no buffers for sparse clusters */
+                       if (bplist[bp_index]) {
+                               /* make inode pointer */
+                               dino = xfs_make_iptr(mp, bplist[bp_index],
+                                                    cluster_offset);
+
+                               /*
+                                * we always think that the root and realtime
+                                * inodes are verified even though we may have
+                                * to reset them later to keep from losing the
+                                * chunk that they're in
+                                */
+                               if (verify_dinode(mp, dino, agno, agino) == 0 ||
+                                               (agno == 0 &&
+                                               (mp->m_sb.sb_rootino == agino ||
+                                                mp->m_sb.sb_rsumino == agino ||
+                                                mp->m_sb.sb_rbmino == agino)))
+                                       status++;
+                       }
 
                        irec_offset++;
                        icnt++;
+                       cluster_offset++;
 
-                       if (icnt == XFS_IALLOC_INODES(mp) &&
+                       if (icnt == mp->m_ialloc_inos &&
                                        irec_offset == XFS_INODES_PER_CHUNK)  {
                                /*
                                 * done! - finished up irec and block
                                 * simultaneously
                                 */
-                               libxfs_putbuf(bp);
-                               done = 1;
                                break;
                        } else if (irec_offset == XFS_INODES_PER_CHUNK)  {
                                /*
@@ -668,6 +743,10 @@ process_inode_chunk(xfs_mount_t *mp, xfs_agnumber_t agno, int num_inos,
                                ASSERT(ino_rec->ino_startnum == agino + 1);
                                irec_offset = 0;
                        }
+                       if (cluster_offset == inodes_per_cluster) {
+                               bp_index++;
+                               cluster_offset = 0;
+                       }
                }
 
                /*
@@ -676,8 +755,10 @@ process_inode_chunk(xfs_mount_t *mp, xfs_agnumber_t agno, int num_inos,
                 */
                if (!status)  {
                        *bogus = 1;
-                       if (!done) /* already free'd */
-                         libxfs_putbuf(bp);
+                       for (bp_index = 0; bp_index < cluster_count; bp_index++)
+                               if (bplist[bp_index])
+                                       libxfs_putbuf(bplist[bp_index]);
+                       free(bplist);
                        return(0);
                }
 
@@ -687,57 +768,28 @@ process_inode_chunk(xfs_mount_t *mp, xfs_agnumber_t agno, int num_inos,
                ino_rec = first_irec;
 
                irec_offset = 0;
-               ibuf_offset = 0;
+               cluster_offset = 0;
+               bp_index = 0;
                icnt = 0;
                status = 0;
-               done = 0;
-
-               /* nathans TODO ... memory leak here?: */
-
-               /*
-                * get first block
-                */
-               bp = libxfs_readbuf(mp->m_dev,
-                               XFS_AGB_TO_DADDR(mp, agno, agbno),
-                               XFS_FSB_TO_BB(mp, XFS_IALLOC_BLOCKS(mp)), 0);
-               if (!bp) {
-                       do_warn(_("can't read inode %llu, disk block %lld, "
-                               "cnt %d\n"), XFS_AGINO_TO_INO(mp, agno, agino),
-                               XFS_AGB_TO_DADDR(mp, agno, agbno),
-                               (int)XFS_FSB_TO_BB(mp, XFS_IALLOC_BLOCKS(mp)));
-                       return(1);
-               }
        }
 
        /*
         * mark block as an inode block in the incore bitmap
         */
-       PREPAIR_RW_WRITE_LOCK(&per_ag_lock[agno]);
-       switch (state = get_agbno_state(mp, agno, agbno))  {
-       case XR_E_INO:  /* already marked */
-               break;
-       case XR_E_UNKNOWN:
-       case XR_E_FREE:
-       case XR_E_FREE1:
-               set_agbno_state(mp, agno, agbno, XR_E_INO);
-               break;
-       case XR_E_BAD_STATE:
-               do_error(_("bad state in block map %d\n"), state);
-               break;
-       default:
-               set_agbno_state(mp, agno, agbno, XR_E_MULT);
-               do_warn(_("inode block %llu multiply claimed, state was %d\n"),
-                       XFS_AGB_TO_FSB(mp, agno, agbno), state);
-               break;
-       }
-       PREPAIR_RW_UNLOCK(&per_ag_lock[agno]);
+       if (!is_inode_sparse(ino_rec, irec_offset))
+               process_inode_agbno_state(mp, agno, agbno);
 
-       while (!done)  {
-               /*
-                * make inode pointer
-                */
-               dino = XFS_MAKE_IPTR(mp, bp, icnt);
+       for (;;) {
                agino = irec_offset + ino_rec->ino_startnum;
+               ino = XFS_AGINO_TO_INO(mp, agno, agino);
+
+               if (is_inode_sparse(ino_rec, irec_offset))
+                       goto process_next;
+
+               /* make inode pointer */
+               dino = xfs_make_iptr(mp, bplist[bp_index], cluster_offset);
+
 
                is_used = 3;
                ino_dirty = 0;
@@ -745,13 +797,15 @@ process_inode_chunk(xfs_mount_t *mp, xfs_agnumber_t agno, int num_inos,
 
                status = process_dinode(mp, dino, agno, agino,
                                is_inode_free(ino_rec, irec_offset),
-                               &ino_dirty, &cleared, &is_used,
-                               ino_discovery, check_dups,
+                               &ino_dirty, &is_used,ino_discovery, check_dups,
                                extra_attr_check, &isa_dir, &parent);
 
                ASSERT(is_used != 3);
-               if (ino_dirty)
+               if (ino_dirty) {
                        dirty = 1;
+                       libxfs_dinode_calc_crc(mp, dino);
+               }
+
                /*
                 * XXX - if we want to try and keep
                 * track of whether we need to bang on
@@ -761,23 +815,34 @@ process_inode_chunk(xfs_mount_t *mp, xfs_agnumber_t agno, int num_inos,
                 */
                if (is_used)  {
                        if (is_inode_free(ino_rec, irec_offset))  {
-                               if (verbose || no_modify ||
-                                   XFS_AGINO_TO_INO(mp, agno, agino) !=
-                                                       old_orphanage_ino)  {
-                                       do_warn(_("imap claims in-use inode "
-                                                 "%llu is free, "),
-                                               XFS_AGINO_TO_INO(mp, agno,
-                                               agino));
+                               if (verbose || no_modify)  {
+                                       do_warn(
+       _("imap claims in-use inode %" PRIu64 " is free, "),
+                                               ino);
                                }
 
-                               if (verbose || (!no_modify &&
-                                   XFS_AGINO_TO_INO(mp, agno, agino) !=
-                                               old_orphanage_ino))
+                               if (verbose || !no_modify)
                                        do_warn(_("correcting imap\n"));
                                else
                                        do_warn(_("would correct imap\n"));
                        }
                        set_inode_used(ino_rec, irec_offset);
+
+                       /*
+                        * store the on-disk file type for comparing in
+                        * phase 6.
+                        */
+                       set_inode_ftype(ino_rec, irec_offset,
+                               libxfs_mode_to_ftype(be16_to_cpu(dino->di_mode)));
+
+                       /*
+                        * store on-disk nlink count for comparing in phase 7
+                        */
+                       set_inode_disk_nlinks(ino_rec, irec_offset,
+                               dino->di_version > 1
+                                       ? be32_to_cpu(dino->di_nlink)
+                                       : be16_to_cpu(dino->di_onlink));
+
                } else  {
                        set_inode_free(ino_rec, irec_offset);
                }
@@ -807,73 +872,77 @@ process_inode_chunk(xfs_mount_t *mp, xfs_agnumber_t agno, int num_inos,
                }
 
                if (status)  {
-                       if (mp->m_sb.sb_rootino ==
-                                       XFS_AGINO_TO_INO(mp, agno, agino))  {
+                       if (mp->m_sb.sb_rootino == ino) {
                                need_root_inode = 1;
 
                                if (!no_modify)  {
-                                       do_warn(_("cleared root inode %llu\n"),
-                                               XFS_AGINO_TO_INO(mp, agno,
-                                               agino));
+                                       do_warn(
+       _("cleared root inode %" PRIu64 "\n"),
+                                               ino);
                                } else  {
-                                       do_warn(_("would clear root inode %llu\n"),
-                                               XFS_AGINO_TO_INO(mp, agno,
-                                               agino));
+                                       do_warn(
+       _("would clear root inode %" PRIu64 "\n"),
+                                               ino);
                                }
-                       } else if (mp->m_sb.sb_rbmino ==
-                                       XFS_AGINO_TO_INO(mp, agno, agino))  {
+                       } else if (mp->m_sb.sb_rbmino == ino) {
                                need_rbmino = 1;
 
                                if (!no_modify)  {
-                                       do_warn(_("cleared realtime bitmap "
-                                                 "inode %llu\n"),
-                                               XFS_AGINO_TO_INO(mp, agno,
-                                               agino));
+                                       do_warn(
+       _("cleared realtime bitmap inode %" PRIu64 "\n"),
+                                               ino);
                                } else  {
-                                       do_warn(_("would clear realtime bitmap "
-                                                 "inode %llu\n"),
-                                               XFS_AGINO_TO_INO(mp, agno,
-                                               agino));
+                                       do_warn(
+       _("would clear realtime bitmap inode %" PRIu64 "\n"),
+                                               ino);
                                }
-                       } else if (mp->m_sb.sb_rsumino ==
-                                       XFS_AGINO_TO_INO(mp, agno, agino))  {
+                       } else if (mp->m_sb.sb_rsumino == ino) {
                                need_rsumino = 1;
 
                                if (!no_modify)  {
-                                       do_warn(_("cleared realtime summary "
-                                                 "inode %llu\n"),
-                                               XFS_AGINO_TO_INO(mp, agno,
-                                               agino));
+                                       do_warn(
+       _("cleared realtime summary inode %" PRIu64 "\n"),
+                                               ino);
                                } else  {
-                                       do_warn(_("would clear realtime summary"
-                                                 " inode %llu\n"),
-                                               XFS_AGINO_TO_INO(mp, agno,
-                                               agino));
+                                       do_warn(
+       _("would clear realtime summary inode %" PRIu64 "\n"),
+                                               ino);
                                }
                        } else if (!no_modify)  {
-                               do_warn(_("cleared inode %llu\n"),
-                                       XFS_AGINO_TO_INO(mp, agno, agino));
+                               do_warn(_("cleared inode %" PRIu64 "\n"),
+                                       ino);
                        } else  {
-                               do_warn(_("would have cleared inode %llu\n"),
-                                       XFS_AGINO_TO_INO(mp, agno, agino));
+                               do_warn(_("would have cleared inode %" PRIu64 "\n"),
+                                       ino);
                        }
+                       clear_inode_was_rl(ino_rec, irec_offset);
                }
 
+process_next:
                irec_offset++;
                ibuf_offset++;
                icnt++;
+               cluster_offset++;
 
-               if (icnt == XFS_IALLOC_INODES(mp) &&
+               if (icnt == mp->m_ialloc_inos &&
                                irec_offset == XFS_INODES_PER_CHUNK)  {
                        /*
                         * done! - finished up irec and block simultaneously
                         */
-                       if (dirty && !no_modify)
-                               libxfs_writebuf(bp, 0);
-                       else
-                               libxfs_putbuf(bp);
+                       for (bp_index = 0; bp_index < cluster_count; bp_index++) {
+                               if (!bplist[bp_index])
+                                       continue;
+
+                               pftrace("put/writebuf %p (%llu) in AG %d",
+                                       bplist[bp_index], (long long)
+                                       XFS_BUF_ADDR(bplist[bp_index]), agno);
 
-                       done = 1;
+                               if (dirty && !no_modify)
+                                       libxfs_writebuf(bplist[bp_index], 0);
+                               else
+                                       libxfs_putbuf(bplist[bp_index]);
+                       }
+                       free(bplist);
                        break;
                } else if (ibuf_offset == mp->m_sb.sb_inopblock)  {
                        /*
@@ -883,28 +952,8 @@ process_inode_chunk(xfs_mount_t *mp, xfs_agnumber_t agno, int num_inos,
                        ibuf_offset = 0;
                        agbno++;
 
-                       PREPAIR_RW_WRITE_LOCK(&per_ag_lock[agno]);
-                       switch (state = get_agbno_state(mp, agno, agbno))  {
-                       case XR_E_INO:  /* already marked */
-                               break;
-                       case XR_E_UNKNOWN:
-                       case XR_E_FREE:
-                       case XR_E_FREE1:
-                               set_agbno_state(mp, agno, agbno, XR_E_INO);
-                               break;
-                       case XR_E_BAD_STATE:
-                               do_error(_("bad state in block map %d\n"),
-                                       state);
-                               break;
-                       default:
-                               set_agbno_state(mp, agno, agbno, XR_E_MULT);
-                               do_warn(_("inode block %llu multiply claimed, "
-                                         "state was %d\n"),
-                                       XFS_AGB_TO_FSB(mp, agno, agbno), state);
-                               break;
-                       }
-                       PREPAIR_RW_UNLOCK(&per_ag_lock[agno]);
-
+                       if (!is_inode_sparse(ino_rec, irec_offset))
+                               process_inode_agbno_state(mp, agno, agbno);
                } else if (irec_offset == XFS_INODES_PER_CHUNK)  {
                        /*
                         * get new irec (multiple chunks per block fs)
@@ -913,6 +962,10 @@ process_inode_chunk(xfs_mount_t *mp, xfs_agnumber_t agno, int num_inos,
                        ASSERT(ino_rec->ino_startnum == agino + 1);
                        irec_offset = 0;
                }
+               if (cluster_offset == inodes_per_cluster) {
+                       bp_index++;
+                       cluster_offset = 0;
+               }
        }
        return(0);
 }
@@ -930,16 +983,21 @@ process_inode_chunk(xfs_mount_t *mp, xfs_agnumber_t agno, int num_inos,
  * phase 4 after we've run through and set the bitmap once.
  */
 void
-process_aginodes(xfs_mount_t *mp, xfs_agnumber_t agno,
-               int ino_discovery, int check_dups, int extra_attr_check)
+process_aginodes(
+       xfs_mount_t             *mp,
+       prefetch_args_t         *pf_args,
+       xfs_agnumber_t          agno,
+       int                     ino_discovery,
+       int                     check_dups,
+       int                     extra_attr_check)
 {
-       int num_inos, bogus;
-       ino_tree_node_t *ino_rec, *first_ino_rec, *prev_ino_rec;
-       ino_tree_node_t *ino_ra;
-
-       ino_ra = do_prefetch ? prefetch_inode_chunks(mp, agno, NULL) : NULL;
-
+       int                     num_inos, bogus;
+       ino_tree_node_t         *ino_rec, *first_ino_rec, *prev_ino_rec;
+#ifdef XR_PF_TRACE
+       int                     count;
+#endif
        first_ino_rec = ino_rec = findfirst_inode_rec(agno);
+
        while (ino_rec != NULL)  {
                /*
                 * paranoia - step through inode records until we step
@@ -950,8 +1008,7 @@ process_aginodes(xfs_mount_t *mp, xfs_agnumber_t agno,
                 * the next block before we call the processing routines.
                 */
                num_inos = XFS_INODES_PER_CHUNK;
-               while (num_inos < XFS_IALLOC_INODES(mp) && ino_rec != NULL)  {
-                       ASSERT(ino_rec != NULL);
+               while (num_inos < mp->m_ialloc_inos && ino_rec != NULL)  {
                        /*
                         * inodes chunks will always be aligned and sized
                         * correctly
@@ -960,13 +1017,20 @@ process_aginodes(xfs_mount_t *mp, xfs_agnumber_t agno,
                                num_inos += XFS_INODES_PER_CHUNK;
                }
 
-               ASSERT(num_inos == XFS_IALLOC_INODES(mp));
+               ASSERT(num_inos == mp->m_ialloc_inos);
 
-               if (do_prefetch && ino_ra && (first_ino_rec->ino_startnum >= ino_ra->ino_startnum))
-                       ino_ra = prefetch_inode_chunks(mp, agno, ino_ra);
+               if (pf_args) {
+                       sem_post(&pf_args->ra_count);
+#ifdef XR_PF_TRACE
+                       sem_getvalue(&pf_args->ra_count, &count);
+                       pftrace("processing inode chunk %p in AG %d (sem count = %d)",
+                               first_ino_rec, agno, count);
+#endif
+               }
 
                if (process_inode_chunk(mp, agno, num_inos, first_ino_rec,
-                               ino_discovery, check_dups, extra_attr_check, &bogus))  {
+                               ino_discovery, check_dups, extra_attr_check,
+                               &bogus))  {
                        /* XXX - i/o error, we've got a problem */
                        abort();
                }
@@ -985,19 +1049,20 @@ process_aginodes(xfs_mount_t *mp, xfs_agnumber_t agno,
                         */
                        num_inos = 0;
                        ino_rec = first_ino_rec;
-                       while (num_inos < XFS_IALLOC_INODES(mp) &&
+                       while (num_inos < mp->m_ialloc_inos &&
                                        ino_rec != NULL)  {
                                prev_ino_rec = ino_rec;
 
                                if ((ino_rec = next_ino_rec(ino_rec)) != NULL)
                                        num_inos += XFS_INODES_PER_CHUNK;
 
-                               get_inode_rec(agno, prev_ino_rec);
+                               get_inode_rec(mp, agno, prev_ino_rec);
                                free_inode_rec(agno, prev_ino_rec);
                        }
 
                        first_ino_rec = ino_rec;
                }
+               PROG_RPT_INC(prog_rpt_done[agno], num_inos);
        }
 }
 
@@ -1059,14 +1124,14 @@ check_uncertain_aginodes(xfs_mount_t *mp, xfs_agnumber_t agno)
                                        XFS_INODES_PER_CHUNK)
                                continue;
 
-                       if ((nrec = find_inode_rec(agno, agino)) == NULL)
+                       if ((nrec = find_inode_rec(mp, agno, agino)) == NULL)
                                if (!verify_aginum(mp, agno, agino))
                                        if (verify_aginode_chunk(mp, agno,
                                                        agino, &start))
                                                got_some = 1;
                }
 
-               get_uncertain_inode_rec(agno, irec);
+               get_uncertain_inode_rec(mp, agno, irec);
                free_inode_rec(agno, irec);
 
                irec = findfirst_uncertain_inode_rec(agno);
@@ -1149,7 +1214,7 @@ process_uncertain_aginodes(xfs_mount_t *mp, xfs_agnumber_t agno)
                                        XFS_INODES_PER_CHUNK)
                                continue;
 
-                       if ((nrec = find_inode_rec(agno, agino)) != NULL)
+                       if ((nrec = find_inode_rec(mp, agno, agino)) != NULL)
                                continue;
 
                        /*
@@ -1168,7 +1233,7 @@ process_uncertain_aginodes(xfs_mount_t *mp, xfs_agnumber_t agno)
                         * processing may add more records to the
                         * uncertain inode lists.
                         */
-                       if (process_inode_chunk(mp, agno, XFS_IALLOC_INODES(mp),
+                       if (process_inode_chunk(mp, agno, mp->m_ialloc_inos,
                                                nrec, 1, 0, 0, &bogus))  {
                                /* XXX - i/o error, we've got a problem */
                                abort();
@@ -1180,7 +1245,7 @@ process_uncertain_aginodes(xfs_mount_t *mp, xfs_agnumber_t agno)
                 * now return the uncertain inode record to the free pool
                 * and pull another one off the list for processing
                 */
-               get_uncertain_inode_rec(agno, irec);
+               get_uncertain_inode_rec(mp, agno, irec);
                free_inode_rec(agno, irec);
 
                irec = findfirst_uncertain_inode_rec(agno);