xfs_repair: detect and fix padding fields that changed with nrext64

[thirdparty/xfsprogs-dev.git] / repair / phase6.c
diff --git a/repair/phase6.c b/repair/phase6.c

index f69afac90e82214a8c8e2c209a8ffdb1fd4cb9c0..c04b2e0999c0a181f93b6b63a6a7dcb5453b6fef 100644 (file)
--- a/repair/phase6.c
+++ b/repair/phase6.c
@@ -6,6 +6,7 @@
  
  #include "libxfs.h"
  #include "threads.h"
+#include "threads.h"
  #include "prefetch.h"
  #include "avl.h"
  #include "globals.h"
@@ -65,29 +66,29 @@ add_dotdot_update(
   * and whether their leaf entry has been seen. Also used for name
   * duplicate checking and rebuilding step if required.
   */
-typedef struct dir_hash_ent {
-       struct dir_hash_ent     *nextbyaddr;    /* next in addr bucket */
+struct dir_hash_ent {
         struct dir_hash_ent     *nextbyhash;    /* next in name bucket */
         struct dir_hash_ent     *nextbyorder;   /* next in order added */
         xfs_dahash_t            hashval;        /* hash value of name */
         uint32_t                address;        /* offset of data entry */
-       xfs_ino_t               inum;           /* inode num of entry */
+       xfs_ino_t               inum;           /* inode num of entry */
         short                   junkit;         /* name starts with / */
         short                   seen;           /* have seen leaf entry */
         struct xfs_name         name;
-} dir_hash_ent_t;
+       unsigned char           namebuf[];
+};
  
-typedef struct dir_hash_tab {
+struct dir_hash_tab {
         int                     size;           /* size of hash tables */
-       int                     names_duped;    /* 1 = ent names malloced */
-       dir_hash_ent_t          *first;         /* ptr to first added entry */
-       dir_hash_ent_t          *last;          /* ptr to last added entry */
-       dir_hash_ent_t          **byhash;       /* ptr to name hash buckets */
-       dir_hash_ent_t          **byaddr;       /* ptr to addr hash buckets */
-} dir_hash_tab_t;
+       struct dir_hash_ent     *first;         /* ptr to first added entry */
+       struct dir_hash_ent     *last;          /* ptr to last added entry */
+       struct dir_hash_ent     **byhash;       /* ptr to name hash buckets */
+#define HT_UNSEEN              1
+       struct radix_tree_root  byaddr;
+};
  
  #define        DIR_HASH_TAB_SIZE(n)    \
-       (sizeof(dir_hash_tab_t) + (sizeof(dir_hash_ent_t *) * (n) * 2))
+       (sizeof(struct dir_hash_tab) + (sizeof(struct dir_hash_ent *) * (n)))
  #define        DIR_HASH_FUNC(t,a)      ((a) % (t)->size)
  
  /*
@@ -154,8 +155,8 @@ dir_read_buf(
   */
  static int
  dir_hash_add(
-       xfs_mount_t             *mp,
-       dir_hash_tab_t          *hashtab,
+       struct xfs_mount        *mp,
+       struct dir_hash_tab     *hashtab,
         uint32_t                addr,
         xfs_ino_t               inum,
         int                     namelen,
@@ -163,21 +164,18 @@ dir_hash_add(
         uint8_t                 ftype)
  {
         xfs_dahash_t            hash = 0;
-       int                     byaddr;
         int                     byhash = 0;
-       dir_hash_ent_t          *p;
+       struct dir_hash_ent     *p;
         int                     dup;
         short                   junk;
         struct xfs_name         xname;
-
-       ASSERT(!hashtab->names_duped);
+       int                     error;
  
         xname.name = name;
         xname.len = namelen;
         xname.type = ftype;
  
         junk = name[0] == '/';
-       byaddr = DIR_HASH_FUNC(hashtab, addr);
         dup = 0;
  
         if (!junk) {
@@ -198,12 +196,23 @@ dir_hash_add(
                 }
         }
  
-       if ((p = malloc(sizeof(*p))) == NULL)
+       /*
+        * Allocate enough space for the hash entry and the name in a single
+        * allocation so we can store our own copy of the name for later use.
+        */
+       p = calloc(1, sizeof(*p) + namelen + 1);
+       if (!p)
                 do_error(_("malloc failed in dir_hash_add (%zu bytes)\n"),
                         sizeof(*p));
  
-       p->nextbyaddr = hashtab->byaddr[byaddr];
-       hashtab->byaddr[byaddr] = p;
+       error = radix_tree_insert(&hashtab->byaddr, addr, p);
+       if (error == EEXIST) {
+               do_warn(_("duplicate addrs %u in directory!\n"), addr);
+               free(p);
+               return 0;
+       }
+       radix_tree_tag_set(&hashtab->byaddr, addr, HT_UNSEEN);
+
         if (hashtab->last)
                 hashtab->last->nextbyorder = p;
         else
@@ -219,38 +228,38 @@ dir_hash_add(
         p->address = addr;
         p->inum = inum;
         p->seen = 0;
-       p->name = xname;
  
+       /* Set up the name in the region trailing the hash entry. */
+       memcpy(p->namebuf, name, namelen);
+       p->name.name = p->namebuf;
+       p->name.len = namelen;
+       p->name.type = ftype;
         return !dup;
  }
  
-/*
- * checks to see if any data entries are not in the leaf blocks
- */
-static int
-dir_hash_unseen(
-       dir_hash_tab_t  *hashtab)
+/* Mark an existing directory hashtable entry as junk. */
+static void
+dir_hash_junkit(
+       struct dir_hash_tab     *hashtab,
+       xfs_dir2_dataptr_t      addr)
  {
-       int             i;
-       dir_hash_ent_t  *p;
+       struct dir_hash_ent     *p;
  
-       for (i = 0; i < hashtab->size; i++) {
-               for (p = hashtab->byaddr[i]; p; p = p->nextbyaddr) {
-                       if (p->seen == 0)
-                               return 1;
-               }
-       }
-       return 0;
+       p = radix_tree_lookup(&hashtab->byaddr, addr);
+       assert(p != NULL);
+
+       p->junkit = 1;
+       p->namebuf[0] = '/';
  }
  
  static int
  dir_hash_check(
-       dir_hash_tab_t  *hashtab,
-       xfs_inode_t     *ip,
-       int             seeval)
+       struct dir_hash_tab     *hashtab,
+       struct xfs_inode        *ip,
+       int                     seeval)
  {
-       static char     *seevalstr[DIR_HASH_CK_TOTAL];
-       static int      done;
+       static char             *seevalstr[DIR_HASH_CK_TOTAL];
+       static int              done;
  
         if (!done) {
                 seevalstr[DIR_HASH_CK_OK] = _("ok");
@@ -262,7 +271,8 @@ dir_hash_check(
                 done = 1;
         }
  
-       if (seeval == DIR_HASH_CK_OK && dir_hash_unseen(hashtab))
+       if (seeval == DIR_HASH_CK_OK &&
+           radix_tree_tagged(&hashtab->byaddr, HT_UNSEEN))
                 seeval = DIR_HASH_CK_NOLEAF;
         if (seeval == DIR_HASH_CK_OK)
                 return 0;
@@ -277,83 +287,92 @@ dir_hash_check(
  
  static void
  dir_hash_done(
-       dir_hash_tab_t  *hashtab)
+       struct dir_hash_tab     *hashtab)
  {
-       int             i;
-       dir_hash_ent_t  *n;
-       dir_hash_ent_t  *p;
+       int                     i;
+       struct dir_hash_ent     *n;
+       struct dir_hash_ent     *p;
  
         for (i = 0; i < hashtab->size; i++) {
-               for (p = hashtab->byaddr[i]; p; p = n) {
-                       n = p->nextbyaddr;
-                       if (hashtab->names_duped)
-                               free((void *)p->name.name);
+               for (p = hashtab->byhash[i]; p; p = n) {
+                       n = p->nextbyhash;
+                       radix_tree_delete(&hashtab->byaddr, p->address);
                         free(p);
                 }
         }
         free(hashtab);
  }
  
-static dir_hash_tab_t *
+/*
+ * Create a directory hash index structure based on the size of the directory we
+ * are about to try to repair. The size passed in is the size of the data
+ * segment of the directory in bytes, so we don't really know exactly how many
+ * entries are in it. Hence assume an entry size of around 64 bytes - that's a
+ * name length of 40+ bytes so should cover a most situations with really large
+ * directories.
+ */
+static struct dir_hash_tab *
  dir_hash_init(
-       xfs_fsize_t     size)
+       xfs_fsize_t             size)
  {
-       dir_hash_tab_t  *hashtab;
-       int             hsize;
+       struct dir_hash_tab     *hashtab = NULL;
+       int                     hsize;
  
-       hsize = size / (16 * 4);
-       if (hsize > 65536)
-               hsize = 63336;
-       else if (hsize < 16)
+       hsize = size / 64;
+       if (hsize < 16)
                 hsize = 16;
-       if ((hashtab = calloc(DIR_HASH_TAB_SIZE(hsize), 1)) == NULL)
+
+       /*
+        * Try to allocate as large a hash table as possible. Failure to
+        * allocate isn't fatal, it will just result in slower performance as we
+        * reduce the size of the table.
+        */
+       while (hsize >= 16) {
+               hashtab = calloc(DIR_HASH_TAB_SIZE(hsize), 1);
+               if (hashtab)
+                       break;
+               hsize /= 2;
+       }
+       if (!hashtab)
                 do_error(_("calloc failed in dir_hash_init\n"));
         hashtab->size = hsize;
-       hashtab->byhash = (dir_hash_ent_t**)((char *)hashtab +
-               sizeof(dir_hash_tab_t));
-       hashtab->byaddr = (dir_hash_ent_t**)((char *)hashtab +
-               sizeof(dir_hash_tab_t) + sizeof(dir_hash_ent_t*) * hsize);
+       hashtab->byhash = (struct dir_hash_ent **)((char *)hashtab +
+               sizeof(struct dir_hash_tab));
+       INIT_RADIX_TREE(&hashtab->byaddr, 0);
         return hashtab;
  }
  
  static int
  dir_hash_see(
-       dir_hash_tab_t          *hashtab,
+       struct dir_hash_tab     *hashtab,
         xfs_dahash_t            hash,
         xfs_dir2_dataptr_t      addr)
  {
-       int                     i;
-       dir_hash_ent_t          *p;
-
-       i = DIR_HASH_FUNC(hashtab, addr);
-       for (p = hashtab->byaddr[i]; p; p = p->nextbyaddr) {
-               if (p->address != addr)
-                       continue;
-               if (p->seen)
-                       return DIR_HASH_CK_DUPLEAF;
-               if (p->junkit == 0 && p->hashval != hash)
-                       return DIR_HASH_CK_BADHASH;
-               p->seen = 1;
-               return DIR_HASH_CK_OK;
-       }
-       return DIR_HASH_CK_NODATA;
+       struct dir_hash_ent     *p;
+
+       p = radix_tree_lookup(&hashtab->byaddr, addr);
+       if (!p)
+               return DIR_HASH_CK_NODATA;
+       if (!radix_tree_tag_get(&hashtab->byaddr, addr, HT_UNSEEN))
+               return DIR_HASH_CK_DUPLEAF;
+       if (p->junkit == 0 && p->hashval != hash)
+               return DIR_HASH_CK_BADHASH;
+       radix_tree_tag_clear(&hashtab->byaddr, addr, HT_UNSEEN);
+       return DIR_HASH_CK_OK;
  }
  
  static void
  dir_hash_update_ftype(
-       dir_hash_tab_t          *hashtab,
+       struct dir_hash_tab     *hashtab,
         xfs_dir2_dataptr_t      addr,
         uint8_t                 ftype)
  {
-       int                     i;
-       dir_hash_ent_t          *p;
+       struct dir_hash_ent     *p;
  
-       i = DIR_HASH_FUNC(hashtab, addr);
-       for (p = hashtab->byaddr[i]; p; p = p->nextbyaddr) {
-               if (p->address != addr)
-                       continue;
-               p->name.type = ftype;
-       }
+       p = radix_tree_lookup(&hashtab->byaddr, addr);
+       if (!p)
+               return;
+       p->name.type = ftype;
  }
  
  /*
@@ -362,7 +381,7 @@ dir_hash_update_ftype(
   */
  static int
  dir_hash_see_all(
-       dir_hash_tab_t          *hashtab,
+       struct dir_hash_tab     *hashtab,
         xfs_dir2_leaf_entry_t   *ents,
         int                     count,
         int                     stale)
@@ -384,27 +403,6 @@ dir_hash_see_all(
         return j == stale ? DIR_HASH_CK_OK : DIR_HASH_CK_BADSTALE;
  }
  
-/*
- * Convert name pointers into locally allocated memory.
- * This must only be done after all the entries have been added.
- */
-static void
-dir_hash_dup_names(dir_hash_tab_t *hashtab)
-{
-       unsigned char           *name;
-       dir_hash_ent_t          *p;
-
-       if (hashtab->names_duped)
-               return;
-
-       for (p = hashtab->first; p; p = p->nextbyorder) {
-               name = malloc(p->name.len);
-               memcpy(name, p->name.name, p->name.len);
-               p->name.name = name;
-       }
-       hashtab->names_duped = 1;
-}
-
  /*
   * Given a block number in a fork, return the next valid block number (not a
   * hole).  If this is the last block number then NULLFILEOFF is returned.
@@ -430,11 +428,10 @@ bmap_next_offset(
                 return EIO;
         }
  
-       if (!(ip->i_df.if_flags & XFS_IFEXTENTS)) {
-               error = -libxfs_iread_extents(NULL, ip, XFS_DATA_FORK);
-               if (error)
-                       return error;
-       }
+        /* Read extent map. */
+       error = -libxfs_iread_extents(NULL, ip, XFS_DATA_FORK);
+       if (error)
+               return error;
  
         bno = *bnop + 1;
         if (!libxfs_iext_lookup_extent(ip, &ip->i_df, bno, &icur, &got))
@@ -454,6 +451,22 @@ res_failed(
                 do_error(_("xfs_trans_reserve returned %d\n"), err);
  }
  
+static inline void
+reset_inode_fields(struct xfs_inode *ip)
+{
+       ip->i_projid = 0;
+       ip->i_disk_size = 0;
+       ip->i_nblocks = 0;
+       ip->i_extsize = 0;
+       ip->i_cowextsize = 0;
+       ip->i_flushiter = 0;
+       ip->i_forkoff = 0;
+       ip->i_diflags = 0;
+       ip->i_diflags2 = 0;
+       ip->i_crtime.tv_sec = 0;
+       ip->i_crtime.tv_nsec = 0;
+}
+
  static void
  mk_rbmino(xfs_mount_t *mp)
  {
@@ -482,7 +495,7 @@ mk_rbmino(xfs_mount_t *mp)
                         error);
         }
  
-       memset(&ip->i_d, 0, sizeof(ip->i_d));
+       reset_inode_fields(ip);
  
         VFS_I(ip)->i_mode = S_IFREG;
         ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS;
@@ -492,9 +505,9 @@ mk_rbmino(xfs_mount_t *mp)
         set_nlink(VFS_I(ip), 1);        /* account for sb ptr */
  
         times = XFS_ICHGTIME_CHG | XFS_ICHGTIME_MOD;
-       if (xfs_sb_version_has_v3inode(&mp->m_sb)) {
+       if (xfs_has_v3inodes(mp)) {
                 VFS_I(ip)->i_version = 1;
-               ip->i_d.di_flags2 = 0;
+               ip->i_diflags2 = 0;
                 times |= XFS_ICHGTIME_CREATE;
         }
         libxfs_trans_ichgtime(tp, ip, times);
@@ -502,11 +515,10 @@ mk_rbmino(xfs_mount_t *mp)
         /*
          * now the ifork
          */
-       ip->i_df.if_flags = XFS_IFEXTENTS;
         ip->i_df.if_bytes = 0;
         ip->i_df.if_u1.if_root = NULL;
  
-       ip->i_d.di_size = mp->m_sb.sb_rbmblocks * mp->m_sb.sb_blocksize;
+       ip->i_disk_size = mp->m_sb.sb_rbmblocks * mp->m_sb.sb_blocksize;
  
         /*
          * commit changes
@@ -558,7 +570,7 @@ mk_rbmino(xfs_mount_t *mp)
  static int
  fill_rbmino(xfs_mount_t *mp)
  {
-       xfs_buf_t       *bp;
+       struct xfs_buf  *bp;
         xfs_trans_t     *tp;
         xfs_inode_t     *ip;
         xfs_rtword_t    *bmp;
@@ -626,7 +638,7 @@ _("can't access block %" PRIu64 " (fsbno %" PRIu64 ") of realtime bitmap inode %
  static int
  fill_rsumino(xfs_mount_t *mp)
  {
-       xfs_buf_t       *bp;
+       struct xfs_buf  *bp;
         xfs_trans_t     *tp;
         xfs_inode_t     *ip;
         xfs_suminfo_t   *smp;
@@ -723,7 +735,7 @@ mk_rsumino(xfs_mount_t *mp)
                         error);
         }
  
-       memset(&ip->i_d, 0, sizeof(ip->i_d));
+       reset_inode_fields(ip);
  
         VFS_I(ip)->i_mode = S_IFREG;
         ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS;
@@ -733,9 +745,9 @@ mk_rsumino(xfs_mount_t *mp)
         set_nlink(VFS_I(ip), 1);        /* account for sb ptr */
  
         times = XFS_ICHGTIME_CHG | XFS_ICHGTIME_MOD;
-       if (xfs_sb_version_has_v3inode(&mp->m_sb)) {
+       if (xfs_has_v3inodes(mp)) {
                 VFS_I(ip)->i_version = 1;
-               ip->i_d.di_flags2 = 0;
+               ip->i_diflags2 = 0;
                 times |= XFS_ICHGTIME_CREATE;
         }
         libxfs_trans_ichgtime(tp, ip, times);
@@ -743,11 +755,10 @@ mk_rsumino(xfs_mount_t *mp)
         /*
          * now the ifork
          */
-       ip->i_df.if_flags = XFS_IFEXTENTS;
         ip->i_df.if_bytes = 0;
         ip->i_df.if_u1.if_root = NULL;
  
-       ip->i_d.di_size = mp->m_rsumsize;
+       ip->i_disk_size = mp->m_rsumsize;
  
         /*
          * commit changes
@@ -823,7 +834,7 @@ mk_root_dir(xfs_mount_t *mp)
         /*
          * take care of the core -- initialization from xfs_ialloc()
          */
-       memset(&ip->i_d, 0, sizeof(ip->i_d));
+       reset_inode_fields(ip);
  
         VFS_I(ip)->i_mode = mode|S_IFDIR;
         ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS;
@@ -833,9 +844,9 @@ mk_root_dir(xfs_mount_t *mp)
         set_nlink(VFS_I(ip), 2);        /* account for . and .. */
  
         times = XFS_ICHGTIME_CHG | XFS_ICHGTIME_MOD;
-       if (xfs_sb_version_has_v3inode(&mp->m_sb)) {
+       if (xfs_has_v3inodes(mp)) {
                 VFS_I(ip)->i_version = 1;
-               ip->i_d.di_flags2 = 0;
+               ip->i_diflags2 = 0;
                 times |= XFS_ICHGTIME_CREATE;
         }
         libxfs_trans_ichgtime(tp, ip, times);
@@ -845,7 +856,6 @@ mk_root_dir(xfs_mount_t *mp)
         /*
          * now the ifork
          */
-       ip->i_df.if_flags = XFS_IFEXTENTS;
         ip->i_df.if_bytes = 0;
         ip->i_df.if_u1.if_root = NULL;
  
@@ -1057,9 +1067,7 @@ mv_orphanage(
                         err = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_rename,
                                                   nres, 0, 0, &tp);
                         if (err)
-                               do_error(
-       _("space reservation failed (%d), filesystem may be out of space\n"),
-                                       err);
+                               res_failed(err);
  
                         libxfs_trans_ijoin(tp, orphanage_ip, 0);
                         libxfs_trans_ijoin(tp, ino_p, 0);
@@ -1068,8 +1076,7 @@ mv_orphanage(
                                                 ino, nres);
                         if (err)
                                 do_error(
-       _("name create failed in %s (%d), filesystem may be out of space\n"),
-                                       ORPHANAGE, err);
+       _("name create failed in %s (%d)\n"), ORPHANAGE, err);
  
                         if (irec)
                                 add_inode_ref(irec, ino_offset);
@@ -1081,8 +1088,7 @@ mv_orphanage(
                                         orphanage_ino, nres);
                         if (err)
                                 do_error(
-       _("creation of .. entry failed (%d), filesystem may be out of space\n"),
-                                       err);
+       _("creation of .. entry failed (%d)\n"), err);
  
                         inc_nlink(VFS_I(ino_p));
                         libxfs_trans_log_inode(tp, ino_p, XFS_ILOG_CORE);
@@ -1094,9 +1100,7 @@ mv_orphanage(
                         err = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_rename,
                                                   nres, 0, 0, &tp);
                         if (err)
-                               do_error(
-       _("space reservation failed (%d), filesystem may be out of space\n"),
-                                       err);
+                               res_failed(err);
  
                         libxfs_trans_ijoin(tp, orphanage_ip, 0);
                         libxfs_trans_ijoin(tp, ino_p, 0);
@@ -1106,8 +1110,7 @@ mv_orphanage(
                                                 ino, nres);
                         if (err)
                                 do_error(
-       _("name create failed in %s (%d), filesystem may be out of space\n"),
-                                       ORPHANAGE, err);
+       _("name create failed in %s (%d)\n"), ORPHANAGE, err);
  
                         if (irec)
                                 add_inode_ref(irec, ino_offset);
@@ -1125,8 +1128,7 @@ mv_orphanage(
                                                 nres);
                                 if (err)
                                         do_error(
-       _("name replace op failed (%d), filesystem may be out of space\n"),
-                                               err);
+       _("name replace op failed (%d)\n"), err);
                         }
  
                         err = -libxfs_trans_commit(tp);
@@ -1146,9 +1148,7 @@ mv_orphanage(
                 err = -libxfs_trans_alloc(mp, &M_RES(mp)->tr_remove,
                                           nres, 0, 0, &tp);
                 if (err)
-                       do_error(
-       _("space reservation failed (%d), filesystem may be out of space\n"),
-                               err);
+                       res_failed(err);
  
                 libxfs_trans_ijoin(tp, orphanage_ip, 0);
                 libxfs_trans_ijoin(tp, ino_p, 0);
@@ -1157,8 +1157,7 @@ mv_orphanage(
                                                 nres);
                 if (err)
                         do_error(
-       _("name create failed in %s (%d), filesystem may be out of space\n"),
-                               ORPHANAGE, err);
+       _("name create failed in %s (%d)\n"), ORPHANAGE, err);
                 ASSERT(err == 0);
  
                 set_nlink(VFS_I(ino_p), 1);
@@ -1237,19 +1236,19 @@ dir_binval(
  
  static void
  longform_dir2_rebuild(
-       xfs_mount_t             *mp,
+       struct xfs_mount        *mp,
         xfs_ino_t               ino,
-       xfs_inode_t             *ip,
-       ino_tree_node_t         *irec,
+       struct xfs_inode        *ip,
+       struct ino_tree_node    *irec,
         int                     ino_offset,
-       dir_hash_tab_t          *hashtab)
+       struct dir_hash_tab     *hashtab)
  {
         int                     error;
         int                     nres;
-       xfs_trans_t             *tp;
+       struct xfs_trans        *tp;
         xfs_fileoff_t           lastblock;
-       xfs_inode_t             pip;
-       dir_hash_ent_t          *p;
+       struct xfs_inode        pip;
+       struct dir_hash_ent     *p;
         int                     done = 0;
  
         /*
@@ -1341,8 +1340,7 @@ longform_dir2_rebuild(
                                                 nres);
                 if (error) {
                         do_warn(
-_("name create failed in ino %" PRIu64 " (%d), filesystem may be out of space\n"),
-                               ino, error);
+_("name create failed in ino %" PRIu64 " (%d)\n"), ino, error);
                         goto out_bmap_cancel;
                 }
  
@@ -1382,6 +1380,7 @@ dir2_kill_block(
                 res_failed(error);
         libxfs_trans_ijoin(tp, ip, 0);
         libxfs_trans_bjoin(tp, bp);
+       libxfs_trans_bhold(tp, bp);
         memset(&args, 0, sizeof(args));
         args.dp = ip;
         args.trans = tp;
@@ -1401,27 +1400,68 @@ dir2_kill_block(
  _("directory shrink failed (%d)\n"), error);
  }
  
+static inline void
+check_longform_ftype(
+       struct xfs_mount        *mp,
+       struct xfs_inode        *ip,
+       xfs_dir2_data_entry_t   *dep,
+       ino_tree_node_t         *irec,
+       int                     ino_offset,
+       struct dir_hash_tab     *hashtab,
+       xfs_dir2_dataptr_t      addr,
+       struct xfs_da_args      *da,
+       struct xfs_buf          *bp)
+{
+       xfs_ino_t               inum = be64_to_cpu(dep->inumber);
+       uint8_t                 dir_ftype;
+       uint8_t                 ino_ftype;
+
+       if (!xfs_has_ftype(mp))
+               return;
+
+       dir_ftype = libxfs_dir2_data_get_ftype(mp, dep);
+       ino_ftype = get_inode_ftype(irec, ino_offset);
+
+       if (dir_ftype == ino_ftype)
+               return;
+
+       if (no_modify) {
+               do_warn(
+_("would fix ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"),
+                       dir_ftype, ino_ftype,
+                       ip->i_ino, inum);
+               return;
+       }
+
+       do_warn(
+_("fixing ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"),
+               dir_ftype, ino_ftype,
+               ip->i_ino, inum);
+       libxfs_dir2_data_put_ftype(mp, dep, ino_ftype);
+       libxfs_dir2_data_log_entry(da, bp, dep);
+       dir_hash_update_ftype(hashtab, addr, ino_ftype);
+}
+
  /*
   * process a data block, also checks for .. entry
   * and corrects it to match what we think .. should be
   */
  static void
  longform_dir2_entry_check_data(
-       xfs_mount_t             *mp,
-       xfs_inode_t             *ip,
+       struct xfs_mount        *mp,
+       struct xfs_inode        *ip,
         int                     *num_illegal,
         int                     *need_dot,
-       ino_tree_node_t         *current_irec,
+       struct ino_tree_node    *current_irec,
         int                     current_ino_offset,
-       struct xfs_buf          **bpp,
-       dir_hash_tab_t          *hashtab,
+       struct xfs_buf          *bp,
+       struct dir_hash_tab     *hashtab,
         freetab_t               **freetabp,
         xfs_dablk_t             da_bno,
         int                     isblock)
  {
         xfs_dir2_dataptr_t      addr;
         xfs_dir2_leaf_entry_t   *blp;
-       struct xfs_buf          *bp;
         xfs_dir2_block_tail_t   *btp;
         struct xfs_dir2_data_hdr *d;
         xfs_dir2_db_t           db;
@@ -1452,7 +1492,6 @@ longform_dir2_entry_check_data(
         };
  
  
-       bp = *bpp;
         d = bp->b_addr;
         ptr = (char *)d + mp->m_dir_geo->data_entry_offset;
         nbad = 0;
@@ -1465,13 +1504,13 @@ longform_dir2_entry_check_data(
                 endptr = (char *)blp;
                 if (endptr > (char *)btp)
                         endptr = (char *)btp;
-               if (xfs_sb_version_hascrc(&mp->m_sb))
+               if (xfs_has_crc(mp))
                         wantmagic = XFS_DIR3_BLOCK_MAGIC;
                 else
                         wantmagic = XFS_DIR2_BLOCK_MAGIC;
         } else {
                 endptr = (char *)d + mp->m_dir_geo->blksize;
-               if (xfs_sb_version_hascrc(&mp->m_sb))
+               if (xfs_has_crc(mp))
                         wantmagic = XFS_DIR3_DATA_MAGIC;
                 else
                         wantmagic = XFS_DIR2_DATA_MAGIC;
@@ -1553,10 +1592,8 @@ longform_dir2_entry_check_data(
                         dir2_kill_block(mp, ip, da_bno, bp);
                 } else {
                         do_warn(_("would junk block\n"));
-                       libxfs_buf_relse(bp);
                 }
                 freetab->ents[db].v = NULLDATAOFF;
-               *bpp = NULL;
                 return;
         }
  
@@ -1737,10 +1774,16 @@ longform_dir2_entry_check_data(
                                 if (entry_junked(
         _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is not in the the first block"), fname,
                                                 inum, ip->i_ino)) {
+                                       dir_hash_junkit(hashtab, addr);
                                         dep->name[0] = '/';
                                         libxfs_dir2_data_log_entry(&da, bp, dep);
                                 }
                         }
+
+                       if (!nbad)
+                               check_longform_ftype(mp, ip, dep, irec,
+                                               ino_offset, hashtab, addr, &da,
+                                               bp);
                         continue;
                 }
                 ASSERT(no_modify || libxfs_verify_dir_ino(mp, inum));
@@ -1764,10 +1807,16 @@ longform_dir2_entry_check_data(
                                 if (entry_junked(
         _("entry \"%s\" in dir %" PRIu64 " is not the first entry"),
                                                 fname, inum, ip->i_ino)) {
+                                       dir_hash_junkit(hashtab, addr);
                                         dep->name[0] = '/';
                                         libxfs_dir2_data_log_entry(&da, bp, dep);
                                 }
                         }
+
+                       if (!nbad)
+                               check_longform_ftype(mp, ip, dep, irec,
+                                               ino_offset, hashtab, addr, &da,
+                                               bp);
                         *need_dot = 0;
                         continue;
                 }
@@ -1778,31 +1827,8 @@ longform_dir2_entry_check_data(
                         continue;
  
                 /* validate ftype field if supported */
-               if (xfs_sb_version_hasftype(&mp->m_sb)) {
-                       uint8_t dir_ftype;
-                       uint8_t ino_ftype;
-
-                       dir_ftype = libxfs_dir2_data_get_ftype(mp, dep);
-                       ino_ftype = get_inode_ftype(irec, ino_offset);
-
-                       if (dir_ftype != ino_ftype) {
-                               if (no_modify) {
-                                       do_warn(
-       _("would fix ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"),
-                                               dir_ftype, ino_ftype,
-                                               ip->i_ino, inum);
-                               } else {
-                                       do_warn(
-       _("fixing ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"),
-                                               dir_ftype, ino_ftype,
-                                               ip->i_ino, inum);
-                                       libxfs_dir2_data_put_ftype(mp, dep, ino_ftype);
-                                       libxfs_dir2_data_log_entry(&da, bp, dep);
-                                       dir_hash_update_ftype(hashtab, addr,
-                                                             ino_ftype);
-                               }
-                       }
-               }
+               check_longform_ftype(mp, ip, dep, irec, ino_offset, hashtab,
+                               addr, &da, bp);
  
                 /*
                  * check easy case first, regular inode, just bump
@@ -1852,6 +1878,7 @@ _("entry \"%s\" in dir inode %" PRIu64 " inconsistent with .. value (%" PRIu64 "
                                 orphanage_ino = 0;
                         nbad++;
                         if (!no_modify)  {
+                               dir_hash_junkit(hashtab, addr);
                                 dep->name[0] = '/';
                                 libxfs_dir2_data_log_entry(&da, bp, dep);
                                 if (verbose)
@@ -1895,21 +1922,21 @@ __check_dir3_header(
         if (be64_to_cpu(owner) != ino) {
                 do_warn(
  _("expected owner inode %" PRIu64 ", got %llu, directory block %" PRIu64 "\n"),
-                       ino, (unsigned long long)be64_to_cpu(owner), bp->b_bn);
+                       ino, (unsigned long long)be64_to_cpu(owner), xfs_buf_daddr(bp));
                 return 1;
         }
         /* verify block number */
-       if (be64_to_cpu(blkno) != bp->b_bn) {
+       if (be64_to_cpu(blkno) != xfs_buf_daddr(bp)) {
                 do_warn(
  _("expected block %" PRIu64 ", got %llu, directory inode %" PRIu64 "\n"),
-                       bp->b_bn, (unsigned long long)be64_to_cpu(blkno), ino);
+                       xfs_buf_daddr(bp), (unsigned long long)be64_to_cpu(blkno), ino);
                 return 1;
         }
         /* verify uuid */
         if (platform_uuid_compare(uuid, &mp->m_sb.sb_meta_uuid) != 0) {
                 do_warn(
  _("wrong FS UUID, directory inode %" PRIu64 " block %" PRIu64 "\n"),
-                       ino, bp->b_bn);
+                       ino, xfs_buf_daddr(bp));
                 return 1;
         }
  
@@ -1945,10 +1972,10 @@ check_dir3_header(
   */
  static int
  longform_dir2_check_leaf(
-       xfs_mount_t             *mp,
-       xfs_inode_t             *ip,
-       dir_hash_tab_t          *hashtab,
-       freetab_t               *freetab)
+       struct xfs_mount        *mp,
+       struct xfs_inode        *ip,
+       struct dir_hash_tab     *hashtab,
+       struct freetab          *freetab)
  {
         int                     badtail;
         __be16                  *bestsp;
@@ -2030,10 +2057,10 @@ longform_dir2_check_leaf(
   */
  static int
  longform_dir2_check_node(
-       xfs_mount_t             *mp,
-       xfs_inode_t             *ip,
-       dir_hash_tab_t          *hashtab,
-       freetab_t               *freetab)
+       struct xfs_mount        *mp,
+       struct xfs_inode        *ip,
+       struct dir_hash_tab     *hashtab,
+       struct freetab          *freetab)
  {
         struct xfs_buf          *bp;
         xfs_dablk_t             da_bno;
@@ -2205,47 +2232,41 @@ longform_dir2_check_node(
   * (ie. get libxfs to do all the grunt work)
   */
  static void
-longform_dir2_entry_check(xfs_mount_t  *mp,
-                       xfs_ino_t       ino,
-                       xfs_inode_t     *ip,
-                       int             *num_illegal,
-                       int             *need_dot,
-                       ino_tree_node_t *irec,
-                       int             ino_offset,
-                       dir_hash_tab_t  *hashtab)
+longform_dir2_entry_check(
+       struct xfs_mount        *mp,
+       xfs_ino_t               ino,
+       struct xfs_inode        *ip,
+       int                     *num_illegal,
+       int                     *need_dot,
+       struct ino_tree_node    *irec,
+       int                     ino_offset,
+       struct dir_hash_tab     *hashtab)
  {
-       struct xfs_buf          **bplist;
+       struct xfs_buf          *bp = NULL;
         xfs_dablk_t             da_bno;
         freetab_t               *freetab;
-       int                     num_bps;
         int                     i;
         int                     isblock;
         int                     isleaf;
         xfs_fileoff_t           next_da_bno;
         int                     seeval;
         int                     fixit = 0;
-       xfs_dir2_db_t           db;
         struct xfs_da_args      args;
  
         *need_dot = 1;
-       freetab = malloc(FREETAB_SIZE(ip->i_d.di_size / mp->m_dir_geo->blksize));
+       freetab = malloc(FREETAB_SIZE(ip->i_disk_size / mp->m_dir_geo->blksize));
         if (!freetab) {
                 do_error(_("malloc failed in %s (%" PRId64 " bytes)\n"),
                         __func__,
-                       FREETAB_SIZE(ip->i_d.di_size / mp->m_dir_geo->blksize));
+                       FREETAB_SIZE(ip->i_disk_size / mp->m_dir_geo->blksize));
                 exit(1);
         }
-       freetab->naents = ip->i_d.di_size / mp->m_dir_geo->blksize;
+       freetab->naents = ip->i_disk_size / mp->m_dir_geo->blksize;
         freetab->nents = 0;
         for (i = 0; i < freetab->naents; i++) {
                 freetab->ents[i].v = NULLDATAOFF;
                 freetab->ents[i].s = 0;
         }
-       num_bps = freetab->naents;
-       bplist = calloc(num_bps, sizeof(struct xfs_buf*));
-       if (!bplist)
-               do_error(_("calloc failed in %s (%zu bytes)\n"),
-                       __func__, num_bps * sizeof(struct xfs_buf*));
  
         /* is this a block, leaf, or node directory? */
         args.dp = ip;
@@ -2274,28 +2295,12 @@ longform_dir2_entry_check(xfs_mount_t   *mp,
                         break;
                 }
  
-               db = xfs_dir2_da_to_db(mp->m_dir_geo, da_bno);
-               if (db >= num_bps) {
-                       int last_size = num_bps;
-
-                       /* more data blocks than expected */
-                       num_bps = db + 1;
-                       bplist = realloc(bplist, num_bps * sizeof(struct xfs_buf*));
-                       if (!bplist)
-                               do_error(_("realloc failed in %s (%zu bytes)\n"),
-                                       __func__,
-                                       num_bps * sizeof(struct xfs_buf*));
-                       /* Initialize the new elements */
-                       for (i = last_size; i < num_bps; i++)
-                               bplist[i] = NULL;
-               }
-
                 if (isblock)
                         ops = &xfs_dir3_block_buf_ops;
                 else
                         ops = &xfs_dir3_data_buf_ops;
  
-               error = dir_read_buf(ip, da_bno, &bplist[db], ops, &fixit);
+               error = dir_read_buf(ip, da_bno, &bp, ops, &fixit);
                 if (error) {
                         do_warn(
         _("can't read data block %u for directory inode %" PRIu64 " error %d\n"),
@@ -2315,21 +2320,25 @@ longform_dir2_entry_check(xfs_mount_t   *mp,
                 }
  
                 /* check v5 metadata */
-               d = bplist[db]->b_addr;
+               d = bp->b_addr;
                 if (be32_to_cpu(d->magic) == XFS_DIR3_BLOCK_MAGIC ||
                     be32_to_cpu(d->magic) == XFS_DIR3_DATA_MAGIC) {
-                       struct xfs_buf           *bp = bplist[db];
-
                         error = check_dir3_header(mp, bp, ino);
                         if (error) {
                                 fixit++;
+                               if (isblock)
+                                       goto out_fix;
                                 continue;
                         }
                 }
  
                 longform_dir2_entry_check_data(mp, ip, num_illegal, need_dot,
-                               irec, ino_offset, &bplist[db], hashtab,
+                               irec, ino_offset, bp, hashtab,
                                 &freetab, da_bno, isblock);
+               if (isblock)
+                       break;
+
+               libxfs_buf_relse(bp);
         }
         fixit |= (*num_illegal != 0) || dir2_is_badino(ino) || *need_dot;
  
@@ -2340,7 +2349,7 @@ longform_dir2_entry_check(xfs_mount_t     *mp,
                         xfs_dir2_block_tail_t   *btp;
                         xfs_dir2_leaf_entry_t   *blp;
  
-                       block = bplist[0]->b_addr;
+                       block = bp->b_addr;
                         btp = xfs_dir2_block_tail_p(mp->m_dir_geo, block);
                         blp = xfs_dir2_block_leaf_p(btp);
                         seeval = dir_hash_see_all(hashtab, blp,
@@ -2357,11 +2366,10 @@ longform_dir2_entry_check(xfs_mount_t   *mp,
                 }
         }
  out_fix:
+       if (isblock && bp)
+               libxfs_buf_relse(bp);
+
         if (!no_modify && (fixit || dotdot_update)) {
-               dir_hash_dup_names(hashtab);
-               for (i = 0; i < num_bps; i++)
-                       if (bplist[i])
-                               libxfs_buf_relse(bplist[i]);
                 longform_dir2_rebuild(mp, ino, ip, irec, ino_offset, hashtab);
                 *num_illegal = 0;
                 *need_dot = 0;
@@ -2369,12 +2377,8 @@ out_fix:
                 if (fixit || dotdot_update)
                         do_warn(
         _("would rebuild directory inode %" PRIu64 "\n"), ino);
-               for (i = 0; i < num_bps; i++)
-                       if (bplist[i])
-                               libxfs_buf_relse(bplist[i]);
         }
  
-       free(bplist);
         free(freetab);
  }
  
@@ -2439,13 +2443,14 @@ shortform_dir2_junk(
  }
  
  static void
-shortform_dir2_entry_check(xfs_mount_t *mp,
-                       xfs_ino_t       ino,
-                       xfs_inode_t     *ip,
-                       int             *ino_dirty,
-                       ino_tree_node_t *current_irec,
-                       int             current_ino_offset,
-                       dir_hash_tab_t  *hashtab)
+shortform_dir2_entry_check(
+       struct xfs_mount        *mp,
+       xfs_ino_t               ino,
+       struct xfs_inode        *ip,
+       int                     *ino_dirty,
+       struct ino_tree_node    *current_irec,
+       int                     current_ino_offset,
+       struct dir_hash_tab     *hashtab)
  {
         xfs_ino_t               lino;
         xfs_ino_t               parent;
@@ -2469,7 +2474,7 @@ shortform_dir2_entry_check(xfs_mount_t    *mp,
         bytes_deleted = 0;
  
         max_size = ifp->if_bytes;
-       ASSERT(ip->i_d.di_size <= ifp->if_bytes);
+       ASSERT(ip->i_disk_size <= ifp->if_bytes);
  
         /*
          * if just rebuild a directory due to a "..", update and return
@@ -2533,7 +2538,7 @@ shortform_dir2_entry_check(xfs_mount_t    *mp,
                         bad_sfnamelen = 1;
  
                         if (i == sfp->count - 1)  {
-                               namelen = ip->i_d.di_size -
+                               namelen = ip->i_disk_size -
                                         ((intptr_t) &sfep->name[0] -
                                          (intptr_t) sfp);
                         } else  {
@@ -2545,11 +2550,11 @@ shortform_dir2_entry_check(xfs_mount_t  *mp,
                         }
                 } else if (no_modify && (intptr_t) sfep - (intptr_t) sfp +
                                 + libxfs_dir2_sf_entsize(mp, sfp, sfep->namelen)
-                               > ip->i_d.di_size)  {
+                               > ip->i_disk_size)  {
                         bad_sfnamelen = 1;
  
                         if (i == sfp->count - 1)  {
-                               namelen = ip->i_d.di_size -
+                               namelen = ip->i_disk_size -
                                         ((intptr_t) &sfep->name[0] -
                                          (intptr_t) sfp);
                         } else  {
@@ -2696,7 +2701,7 @@ _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is a duplicate name"),
                 }
  
                 /* validate ftype field if supported */
-               if (xfs_sb_version_hasftype(&mp->m_sb)) {
+               if (xfs_has_ftype(mp)) {
                         uint8_t dir_ftype;
                         uint8_t ino_ftype;
  
@@ -2767,17 +2772,17 @@ _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is a duplicate name"),
         if (*ino_dirty && bytes_deleted > 0)  {
                 ASSERT(!no_modify);
                 libxfs_idata_realloc(ip, -bytes_deleted, XFS_DATA_FORK);
-               ip->i_d.di_size -= bytes_deleted;
+               ip->i_disk_size -= bytes_deleted;
         }
  
-       if (ip->i_d.di_size != ip->i_df.if_bytes)  {
+       if (ip->i_disk_size != ip->i_df.if_bytes)  {
                 ASSERT(ip->i_df.if_bytes == (xfs_fsize_t)
                                 ((intptr_t) next_sfep - (intptr_t) sfp));
-               ip->i_d.di_size = (xfs_fsize_t)
+               ip->i_disk_size = (xfs_fsize_t)
                                 ((intptr_t) next_sfep - (intptr_t) sfp);
                 do_warn(
         _("setting size to %" PRId64 " bytes to reflect junked entries\n"),
-                       ip->i_d.di_size);
+                       ip->i_disk_size);
                 *ino_dirty = 1;
         }
  }
@@ -2787,15 +2792,15 @@ _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is a duplicate name"),
   */
  static void
  process_dir_inode(
-       xfs_mount_t             *mp,
+       struct xfs_mount        *mp,
         xfs_agnumber_t          agno,
-       ino_tree_node_t         *irec,
+       struct ino_tree_node    *irec,
         int                     ino_offset)
  {
         xfs_ino_t               ino;
-       xfs_inode_t             *ip;
-       xfs_trans_t             *tp;
-       dir_hash_tab_t          *hashtab;
+       struct xfs_inode        *ip;
+       struct xfs_trans        *tp;
+       struct dir_hash_tab     *hashtab;
         int                     need_dot;
         int                     dirty, num_illegal, error, nres;
  
@@ -2851,7 +2856,7 @@ process_dir_inode(
  
         add_inode_refchecked(irec, ino_offset);
  
-       hashtab = dir_hash_init(ip->i_d.di_size);
+       hashtab = dir_hash_init(ip->i_disk_size);
  
         /*
          * look for bogus entries
@@ -3105,20 +3110,44 @@ check_for_orphaned_inodes(
  }
  
  static void
-traverse_function(
+do_dir_inode(
         struct workqueue        *wq,
-       xfs_agnumber_t          agno,
+       xfs_agnumber_t          agno,
         void                    *arg)
  {
-       ino_tree_node_t         *irec;
+       struct ino_tree_node    *irec = arg;
         int                     i;
+
+       for (i = 0; i < XFS_INODES_PER_CHUNK; i++)  {
+               if (inode_isadir(irec, i))
+                       process_dir_inode(wq->wq_ctx, agno, irec, i);
+       }
+}
+
+static void
+traverse_function(
+       struct workqueue        *wq,
+       xfs_agnumber_t          agno,
+       void                    *arg)
+{
+       struct ino_tree_node    *irec;
         prefetch_args_t         *pf_args = arg;
+       struct workqueue        lwq;
+       struct xfs_mount        *mp = wq->wq_ctx;
  
         wait_for_inode_prefetch(pf_args);
  
         if (verbose)
                 do_log(_("        - agno = %d\n"), agno);
  
+       /*
+        * The more AGs we have in flight at once, the fewer processing threads
+        * per AG. This means we don't overwhelm the machine with hundreds of
+        * threads when we start acting on lots of AGs at once. We just want
+        * enough that we can keep multiple CPUs busy across multiple AGs.
+        */
+       workqueue_create_bound(&lwq, mp, ag_stride, 1000);
+
         for (irec = findfirst_inode_rec(agno); irec; irec = next_ino_rec(irec)) {
                 if (irec->ino_isa_dir == 0)
                         continue;
@@ -3126,18 +3155,19 @@ traverse_function(
                 if (pf_args) {
                         sem_post(&pf_args->ra_count);
  #ifdef XR_PF_TRACE
+                       {
+                       int     i;
                         sem_getvalue(&pf_args->ra_count, &i);
                         pftrace(
                 "processing inode chunk %p in AG %d (sem count = %d)",
                                 irec, agno, i);
+                       }
  #endif
                 }
  
-               for (i = 0; i < XFS_INODES_PER_CHUNK; i++)  {
-                       if (inode_isadir(irec, i))
-                               process_dir_inode(wq->wq_ctx, agno, irec, i);
-               }
+               queue_work(&lwq, do_dir_inode, agno, irec);
         }
+       destroy_work_queue(&lwq);
         cleanup_inode_prefetch(pf_args);
  }
  
@@ -3165,7 +3195,7 @@ static void
  traverse_ags(
         struct xfs_mount        *mp)
  {
-       do_inode_prefetch(mp, 0, traverse_function, false, true);
+       do_inode_prefetch(mp, ag_stride, traverse_function, false, true);
  }
  
  void